diff --git a/.gitignore b/.gitignore
index 3d8e148..53cb367 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,6 +30,7 @@ CLUnitTests/test.cl
 [Dd]ebug/
 [Dd]ebugPublic/
 [Rr]elease/
+[Pp]erformance Release/
 [Rr]eleases/
 x64/
 x86/
diff --git a/AddrGen/AddrGen.vcxproj b/AddrGen/AddrGen.vcxproj
index d7a25e6..bf50ce3 100644
--- a/AddrGen/AddrGen.vcxproj
+++ b/AddrGen/AddrGen.vcxproj
@@ -28,26 +28,26 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
@@ -103,7 +103,7 @@
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>EnableAllWarnings</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>$(SolutionDir)secp256k1lib;$(SolutionDir)util;$(SolutionDir)AddressUtil;$(SolutionDir)CmdParse;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
diff --git a/AddrGen/main.cpp b/AddrGen/main.cpp
index 7becf5d..f5a1089 100644
--- a/AddrGen/main.cpp
+++ b/AddrGen/main.cpp
@@ -1,5 +1,5 @@
 #include <iostream>
-#include<string>
+#include <string>
 #include "secp256k1.h"
 #include "util.h"
 #include "AddressUtil.h"
diff --git a/AddressUtil/AddressUtil.h b/AddressUtil/AddressUtil.h
index 14aab13..0879501 100644
--- a/AddressUtil/AddressUtil.h
+++ b/AddressUtil/AddressUtil.h
@@ -1,35 +1,31 @@
-#ifndef _ADDRESS_UTIL_H
-#define _ADDRESS_UTIL_H
+#ifndef ADDRESS_UTIL_H
+#define ADDRESS_UTIL_H
 
 #include "secp256k1.h"
 
 namespace Address {
 	std::string fromPublicKey(const secp256k1::ecpoint &p, bool compressed = false);
 	bool verifyAddress(std::string address);
-};
+}
 
 namespace Base58 {
-	std::string toBase58(const secp256k1::uint256 &x);
+	std::string toBase58(const secp256k1::uint256 value);
 	secp256k1::uint256 toBigInt(const std::string &s);
 	void getMinMaxFromPrefix(const std::string &prefix, secp256k1::uint256 &minValueOut, secp256k1::uint256 &maxValueOut);
 
 	void toHash160(const std::string &s, unsigned int hash[5]);
 
-	bool isBase58(std::string s);
-};
-
-
+	bool isBase58(const std::string &value);
+}
 
 namespace Hash {
 
-
 	void hashPublicKey(const secp256k1::ecpoint &p, unsigned int *digest);
 	void hashPublicKeyCompressed(const secp256k1::ecpoint &p, unsigned int *digest);
 
 	void hashPublicKey(const unsigned int *x, const unsigned int *y, unsigned int *digest);
 	void hashPublicKeyCompressed(const unsigned int *x, const unsigned int *y, unsigned int *digest);
 
-};
-
+}
 
-#endif
\ No newline at end of file
+#endif
diff --git a/AddressUtil/AddressUtil.vcxproj b/AddressUtil/AddressUtil.vcxproj
index c2e6a66..df6aee8 100644
--- a/AddressUtil/AddressUtil.vcxproj
+++ b/AddressUtil/AddressUtil.vcxproj
@@ -5,6 +5,14 @@
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|Win32">
+      <Configuration>Performance Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|x64">
+      <Configuration>Performance Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -37,32 +45,46 @@
     <ProjectGuid>{34042455-D274-432D-9134-C9EA41FD1B54}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>AddressUtil</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PreferredToolArchitecture>x64</PreferredToolArchitecture>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
@@ -75,6 +97,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -83,6 +109,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -102,10 +132,23 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(SolutionDir)secp256k1lib;$(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>EnableAllWarnings</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>$(SolutionDir)secp256k1lib;$(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
@@ -114,6 +157,26 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(SolutionDir)secp256k1lib;$(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
diff --git a/AddressUtil/Base58.cpp b/AddressUtil/Base58.cpp
index 2d80ef3..5c7cdda 100644
--- a/AddressUtil/Base58.cpp
+++ b/AddressUtil/Base58.cpp
@@ -1,28 +1,146 @@
 #include <map>
 #include "CryptoUtil.h"
-
 #include "AddressUtil.h"
 
-
-static const std::string BASE58_STRING = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
-
 struct Base58Map {
-	static std::map<char, int> createBase58Map()
+	static std::map<char, unsigned int> createBase58OrdinalMap()
 	{
-		std::map<char, int> m;
-		for(int i = 0; i < 58; i++) {
-			m[BASE58_STRING[i]] = i;
-		}
+		std::map<char, unsigned int> m;
+
+		m.insert(std::pair<char, unsigned int>('1', 0));
+		m.insert(std::pair<char, unsigned int>('2', 1));
+		m.insert(std::pair<char, unsigned int>('3', 2));
+		m.insert(std::pair<char, unsigned int>('4', 3));
+		m.insert(std::pair<char, unsigned int>('5', 4));
+		m.insert(std::pair<char, unsigned int>('6', 5));
+		m.insert(std::pair<char, unsigned int>('7', 6));
+		m.insert(std::pair<char, unsigned int>('8', 7));
+		m.insert(std::pair<char, unsigned int>('9', 8));
+		m.insert(std::pair<char, unsigned int>('A', 9));
+		m.insert(std::pair<char, unsigned int>('B', 10));
+		m.insert(std::pair<char, unsigned int>('C', 11));
+		m.insert(std::pair<char, unsigned int>('D', 12));
+		m.insert(std::pair<char, unsigned int>('E', 13));
+		m.insert(std::pair<char, unsigned int>('F', 14));
+		m.insert(std::pair<char, unsigned int>('G', 15));
+		m.insert(std::pair<char, unsigned int>('H', 16));
+		m.insert(std::pair<char, unsigned int>('J', 17));
+		m.insert(std::pair<char, unsigned int>('K', 18));
+		m.insert(std::pair<char, unsigned int>('L', 19));
+		m.insert(std::pair<char, unsigned int>('M', 20));
+		m.insert(std::pair<char, unsigned int>('N', 21));
+		m.insert(std::pair<char, unsigned int>('P', 22));
+		m.insert(std::pair<char, unsigned int>('Q', 23));
+		m.insert(std::pair<char, unsigned int>('R', 24));
+		m.insert(std::pair<char, unsigned int>('S', 25));
+		m.insert(std::pair<char, unsigned int>('T', 26));
+		m.insert(std::pair<char, unsigned int>('U', 27));
+		m.insert(std::pair<char, unsigned int>('V', 28));
+		m.insert(std::pair<char, unsigned int>('W', 29));
+		m.insert(std::pair<char, unsigned int>('X', 30));
+		m.insert(std::pair<char, unsigned int>('Y', 31));
+		m.insert(std::pair<char, unsigned int>('Z', 32));
+		m.insert(std::pair<char, unsigned int>('a', 33));
+		m.insert(std::pair<char, unsigned int>('b', 34));
+		m.insert(std::pair<char, unsigned int>('c', 35));
+		m.insert(std::pair<char, unsigned int>('d', 36));
+		m.insert(std::pair<char, unsigned int>('e', 37));
+		m.insert(std::pair<char, unsigned int>('f', 38));
+		m.insert(std::pair<char, unsigned int>('g', 39));
+		m.insert(std::pair<char, unsigned int>('h', 40));
+		m.insert(std::pair<char, unsigned int>('i', 41));
+		m.insert(std::pair<char, unsigned int>('j', 42));
+		m.insert(std::pair<char, unsigned int>('k', 43));
+		m.insert(std::pair<char, unsigned int>('m', 44));
+		m.insert(std::pair<char, unsigned int>('n', 45));
+		m.insert(std::pair<char, unsigned int>('o', 46));
+		m.insert(std::pair<char, unsigned int>('p', 47));
+		m.insert(std::pair<char, unsigned int>('q', 48));
+		m.insert(std::pair<char, unsigned int>('r', 49));
+		m.insert(std::pair<char, unsigned int>('s', 50));
+		m.insert(std::pair<char, unsigned int>('t', 51));
+		m.insert(std::pair<char, unsigned int>('u', 52));
+		m.insert(std::pair<char, unsigned int>('v', 53));
+		m.insert(std::pair<char, unsigned int>('w', 54));
+		m.insert(std::pair<char, unsigned int>('x', 55));
+		m.insert(std::pair<char, unsigned int>('y', 56));
+		m.insert(std::pair<char, unsigned int>('z', 57));
 
 		return m;
 	}
 
-	static std::map<char, int> myMap;
-};
+	static std::map<unsigned int, char> createBase58ReverseMap()
+	{
+		std::map<unsigned int, char> m;
+
+		m.insert(std::pair<unsigned int, char>( 0, '1'));
+		m.insert(std::pair<unsigned int, char>( 1, '2'));
+		m.insert(std::pair<unsigned int, char>( 2, '3'));
+		m.insert(std::pair<unsigned int, char>( 3, '4'));
+		m.insert(std::pair<unsigned int, char>( 4, '5'));
+		m.insert(std::pair<unsigned int, char>( 5, '6'));
+		m.insert(std::pair<unsigned int, char>( 6, '7'));
+		m.insert(std::pair<unsigned int, char>( 7, '8'));
+		m.insert(std::pair<unsigned int, char>( 8, '9'));
+		m.insert(std::pair<unsigned int, char>( 9, 'A'));
+		m.insert(std::pair<unsigned int, char>( 10, 'B'));
+		m.insert(std::pair<unsigned int, char>( 11, 'C'));
+		m.insert(std::pair<unsigned int, char>( 12, 'D'));
+		m.insert(std::pair<unsigned int, char>( 13, 'E'));
+		m.insert(std::pair<unsigned int, char>( 14, 'F'));
+		m.insert(std::pair<unsigned int, char>( 15, 'G'));
+		m.insert(std::pair<unsigned int, char>( 16, 'H'));
+		m.insert(std::pair<unsigned int, char>( 17, 'J'));
+		m.insert(std::pair<unsigned int, char>( 18, 'K'));
+		m.insert(std::pair<unsigned int, char>( 19, 'L'));
+		m.insert(std::pair<unsigned int, char>( 20, 'M'));
+		m.insert(std::pair<unsigned int, char>( 21, 'N'));
+		m.insert(std::pair<unsigned int, char>( 22, 'P'));
+		m.insert(std::pair<unsigned int, char>( 23, 'Q'));
+		m.insert(std::pair<unsigned int, char>( 24, 'R'));
+		m.insert(std::pair<unsigned int, char>( 25, 'S'));
+		m.insert(std::pair<unsigned int, char>( 26, 'T'));
+		m.insert(std::pair<unsigned int, char>( 27, 'U'));
+		m.insert(std::pair<unsigned int, char>( 28, 'V'));
+		m.insert(std::pair<unsigned int, char>( 29, 'W'));
+		m.insert(std::pair<unsigned int, char>( 30, 'X'));
+		m.insert(std::pair<unsigned int, char>( 31, 'Y'));
+		m.insert(std::pair<unsigned int, char>( 32, 'Z'));
+		m.insert(std::pair<unsigned int, char>( 33, 'a'));
+		m.insert(std::pair<unsigned int, char>( 34, 'b'));
+		m.insert(std::pair<unsigned int, char>( 35, 'c'));
+		m.insert(std::pair<unsigned int, char>( 36, 'd'));
+		m.insert(std::pair<unsigned int, char>( 37, 'e'));
+		m.insert(std::pair<unsigned int, char>( 38, 'f'));
+		m.insert(std::pair<unsigned int, char>( 39, 'g'));
+		m.insert(std::pair<unsigned int, char>( 40, 'h'));
+		m.insert(std::pair<unsigned int, char>( 41, 'i'));
+		m.insert(std::pair<unsigned int, char>( 42, 'j'));
+		m.insert(std::pair<unsigned int, char>( 43, 'k'));
+		m.insert(std::pair<unsigned int, char>( 44, 'm'));
+		m.insert(std::pair<unsigned int, char>( 45, 'n'));
+		m.insert(std::pair<unsigned int, char>( 46, 'o'));
+		m.insert(std::pair<unsigned int, char>( 47, 'p'));
+		m.insert(std::pair<unsigned int, char>( 48, 'q'));
+		m.insert(std::pair<unsigned int, char>( 49, 'r'));
+		m.insert(std::pair<unsigned int, char>( 50, 's'));
+		m.insert(std::pair<unsigned int, char>( 51, 't'));
+		m.insert(std::pair<unsigned int, char>( 52, 'u'));
+		m.insert(std::pair<unsigned int, char>( 53, 'v'));
+		m.insert(std::pair<unsigned int, char>( 54, 'w'));
+		m.insert(std::pair<unsigned int, char>( 55, 'x'));
+		m.insert(std::pair<unsigned int, char>( 56, 'y'));
+		m.insert(std::pair<unsigned int, char>( 57, 'z'));
 
-std::map<char, int> Base58Map::myMap = Base58Map::createBase58Map();
+		return m;
+	}
 
+	static std::map<char, unsigned int> ordinal;
+	static std::map<unsigned int, char> reverse;
+};
 
+std::map<char, unsigned int> Base58Map::ordinal = Base58Map::createBase58OrdinalMap();
+std::map<unsigned int, char> Base58Map::reverse = Base58Map::createBase58ReverseMap();
 
 /**
  * Converts a base58 string to uint256
@@ -31,11 +149,8 @@ secp256k1::uint256 Base58::toBigInt(const std::string &s)
 {
 	secp256k1::uint256 value;
 
-	for(unsigned int i = 0; i < s.length(); i++) {
-		value = value.mul(58);
-
-		int c = Base58Map::myMap[s[i]];
-		value = value.add(c);
+	for(size_t i = 0, il = s.length(); i < il; i++) {
+		value = value.mul(58).add(Base58Map::ordinal.find(s[i])->second);
 	}
 
 	return value;
@@ -48,16 +163,17 @@ void Base58::toHash160(const std::string &s, unsigned int hash[5])
 
 	value.exportWords(words, 6, secp256k1::uint256::BigEndian);
 
-	// Extract words, ignore checksum
-	for(int i = 0; i < 5; i++) {
-		hash[i] = words[i];
-	}
+	hash[0] = words[0];
+	hash[1] = words[1];
+	hash[2] = words[2];
+	hash[3] = words[3];
+	hash[4] = words[4];
 }
 
-bool Base58::isBase58(std::string s)
+bool Base58::isBase58(const std::string &value)
 {
-	for(unsigned int i = 0; i < s.length(); i++) {
-		if(BASE58_STRING.find(s[i]) < 0) {
+	for(size_t i = 0; i < value.length(); i++) {
+		if(Base58Map::ordinal.find(value[i]) == Base58Map::ordinal.end()) {
 			return false;
 		}
 	}
@@ -65,22 +181,16 @@ bool Base58::isBase58(std::string s)
 	return true;
 }
 
-std::string Base58::toBase58(const secp256k1::uint256 &x)
+std::string Base58::toBase58(secp256k1::uint256 value)
 {
-	std::string s;
-
-	secp256k1::uint256 value = x;
-
-	while(!value.isZero()) {
-		secp256k1::uint256 digit = value.mod(58);
-		int digitInt = digit.toInt32();
-
-		s = BASE58_STRING[digitInt] + s;
+	std::string result;
 
+	for (unsigned int i = 0; i <= 32; i++) {
+		result.insert(0, 1, Base58Map::reverse.find(value.mod(58).toInt32())->second);
 		value = value.div(58);
 	}
 
-	return s;
+	return result;
 }
 
 void Base58::getMinMaxFromPrefix(const std::string &prefix, secp256k1::uint256 &minValueOut, secp256k1::uint256 &maxValueOut)
@@ -114,4 +224,4 @@ void Base58::getMinMaxFromPrefix(const std::string &prefix, secp256k1::uint256 &
 
 	minValueOut = minValue;
 	maxValueOut = maxValue;
-}
\ No newline at end of file
+}
diff --git a/AddressUtil/hash.cpp b/AddressUtil/hash.cpp
index 8650c0d..d1db5e6 100644
--- a/AddressUtil/hash.cpp
+++ b/AddressUtil/hash.cpp
@@ -13,7 +13,7 @@ bool Address::verifyAddress(std::string address)
 {
 	// Check length
 	if(address.length() > 34) {
-		false;
+		return false;
 	}
 
 	// Check encoding
@@ -185,4 +185,4 @@ void Hash::hashPublicKeyCompressed(const unsigned int *x, const unsigned int *y,
 	msg[15] = 0;
 
 	crypto::ripemd160(msg, digest);
-}
\ No newline at end of file
+}
diff --git a/BitCrack.props b/BitCrack.props
index 0f980a5..f6a0e5e 100644
--- a/BitCrack.props
+++ b/BitCrack.props
@@ -2,10 +2,10 @@
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ImportGroup Label="PropertySheets" />
   <PropertyGroup Label="UserMacros">
-	<CUDA_INCLUDE>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\include</CUDA_INCLUDE>
-	<CUDA_LIB>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\lib\x64</CUDA_LIB>
-    <OPENCL_INCLUDE>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\include</OPENCL_INCLUDE>
-    <OPENCL_LIB>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\lib\x64</OPENCL_LIB>
+	<CUDA_INCLUDE>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\include</CUDA_INCLUDE>
+	<CUDA_LIB>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\lib\x64</CUDA_LIB>
+    <OPENCL_INCLUDE>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\include</OPENCL_INCLUDE>
+    <OPENCL_LIB>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\lib\x64</OPENCL_LIB>
   </PropertyGroup>
   <PropertyGroup />
   <ItemDefinitionGroup />
diff --git a/BitCrack.sln b/BitCrackOpenCL.sln
similarity index 60%
rename from BitCrack.sln
rename to BitCrackOpenCL.sln
index 9913b17..e048ccc 100644
--- a/BitCrack.sln
+++ b/BitCrackOpenCL.sln
@@ -1,31 +1,14 @@
 ﻿
 Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 15
-VisualStudioVersion = 15.0.27703.2018
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.31229.75
 MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "secp256k1lib", "secp256k1lib\secp256k1lib.vcxproj", "{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CmdParse", "CmdParse\CmdParse.vcxproj", "{F7037134-28C5-4EB9-BE5D-587E79A40628}"
-EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util", "util\util.vcxproj", "{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}"
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AddressUtil", "AddressUtil\AddressUtil.vcxproj", "{34042455-D274-432D-9134-C9EA41FD1B54}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuKeyFinder", "KeyFinder\KeyFinder.vcxproj", "{D77642A9-365C-420C-A726-469649D2927E}"
-	ProjectSection(ProjectDependencies) = postProject
-		{53EE0C03-4419-4767-A91B-7FC7D4B3D2AA} = {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}
-		{CCA3D02C-5E5A-4A24-B34B-5961DFA93946} = {CCA3D02C-5E5A-4A24-B34B-5961DFA93946}
-	EndProjectSection
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AddrGen", "AddrGen\AddrGen.vcxproj", "{7AE5E38D-5731-404E-A4F3-229ADF981EFC}"
-	ProjectSection(ProjectDependencies) = postProject
-		{F7037134-28C5-4EB9-BE5D-587E79A40628} = {F7037134-28C5-4EB9-BE5D-587E79A40628}
-		{34042455-D274-432D-9134-C9EA41FD1B54} = {34042455-D274-432D-9134-C9EA41FD1B54}
-		{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6} = {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}
-	EndProjectSection
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cudaUtil", "cudaUtil\cudaUtil.vcxproj", "{EADAAA54-E304-4656-8263-E5E688FF323D}"
-EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CryptoUtil", "CryptoUtil\CryptoUtil.vcxproj", "{CA46856A-1D1E-4F6F-A69C-6707D540BF36}"
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Logger", "Logger\Logger.vcxproj", "{150AF404-1F80-4A13-855B-4383C4A3326F}"
@@ -41,15 +24,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "KeyFinderLib", "KeyFinderLi
 		{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6} = {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}
 	EndProjectSection
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cudaInfo", "cudaInfo\cudaInfo.vcxproj", "{9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}"
-	ProjectSection(ProjectDependencies) = postProject
-		{EADAAA54-E304-4656-8263-E5E688FF323D} = {EADAAA54-E304-4656-8263-E5E688FF323D}
-	EndProjectSection
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cudaMath", "cudaMath\cudaMath.vcxproj", "{E1BDB205-8994-4E49-8B35-172A84E7118C}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CudaKeySearchDevice", "CudaKeySearchDevice\CudaKeySearchDevice.vcxproj", "{CCA3D02C-5E5A-4A24-B34B-5961DFA93946}"
-EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clUtil", "clUtil\clUtil.vcxproj", "{D9A5823D-C472-40AC-B23A-21B1586CEEB0}"
 	ProjectSection(ProjectDependencies) = postProject
 		{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76} = {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}
@@ -62,27 +36,22 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clKeyFinder", "clKeyFinder\
 		{53EE0C03-4419-4767-A91B-7FC7D4B3D2AA} = {53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}
 		{150AF404-1F80-4A13-855B-4383C4A3326F} = {150AF404-1F80-4A13-855B-4383C4A3326F}
 		{546C8D1F-127F-4EF4-914F-2A7F9367C0F9} = {546C8D1F-127F-4EF4-914F-2A7F9367C0F9}
-		{F7037134-28C5-4EB9-BE5D-587E79A40628} = {F7037134-28C5-4EB9-BE5D-587E79A40628}
+		{83327841-C283-4D46-A873-97AC674C68AC} = {83327841-C283-4D46-A873-97AC674C68AC}
 		{34042455-D274-432D-9134-C9EA41FD1B54} = {34042455-D274-432D-9134-C9EA41FD1B54}
 		{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76} = {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}
 		{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6} = {BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}
 	EndProjectSection
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "embedcl", "embedcl\embedcl.vcxproj", "{8DA841AA-42FF-40AA-8F12-BC654DF39FEF}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CLUnitTests", "CLUnitTests\CLUnitTests.vcxproj", "{146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}"
-	ProjectSection(ProjectDependencies) = postProject
-		{D9A5823D-C472-40AC-B23A-21B1586CEEB0} = {D9A5823D-C472-40AC-B23A-21B1586CEEB0}
-		{83327841-C283-4D46-A873-97AC674C68AC} = {83327841-C283-4D46-A873-97AC674C68AC}
-		{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76} = {93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}
-	EndProjectSection
-EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clMath", "clMath\clMath.vcxproj", "{83327841-C283-4D46-A873-97AC674C68AC}"
 EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CmdParse", "CmdParse\CmdParse.vcxproj", "{F7037134-28C5-4EB9-BE5D-587E79A40628}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|x64 = Debug|x64
 		Debug|x86 = Debug|x86
+		Performance Release|x64 = Performance Release|x64
+		Performance Release|x86 = Performance Release|x86
 		Release|x64 = Release|x64
 		Release|x86 = Release|x86
 	EndGlobalSection
@@ -91,22 +60,20 @@ Global
 		{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Debug|x64.Build.0 = Debug|x64
 		{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Debug|x86.ActiveCfg = Debug|Win32
 		{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Debug|x86.Build.0 = Debug|Win32
+		{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Performance Release|x64.ActiveCfg = Performance Release|x64
+		{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Performance Release|x64.Build.0 = Performance Release|x64
+		{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Performance Release|x86.ActiveCfg = Performance Release|Win32
 		{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Release|x64.ActiveCfg = Release|x64
 		{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Release|x64.Build.0 = Release|x64
 		{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Release|x86.ActiveCfg = Release|Win32
 		{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}.Release|x86.Build.0 = Release|Win32
-		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x64.ActiveCfg = Debug|x64
-		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x64.Build.0 = Debug|x64
-		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x86.ActiveCfg = Debug|Win32
-		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x86.Build.0 = Debug|Win32
-		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x64.ActiveCfg = Release|x64
-		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x64.Build.0 = Release|x64
-		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x86.ActiveCfg = Release|Win32
-		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x86.Build.0 = Release|Win32
 		{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Debug|x64.ActiveCfg = Debug|x64
 		{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Debug|x64.Build.0 = Debug|x64
 		{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Debug|x86.ActiveCfg = Debug|Win32
 		{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Debug|x86.Build.0 = Debug|Win32
+		{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Performance Release|x64.ActiveCfg = Performance Release|x64
+		{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Performance Release|x64.Build.0 = Performance Release|x64
+		{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Performance Release|x86.ActiveCfg = Performance Release|Win32
 		{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Release|x64.ActiveCfg = Release|x64
 		{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Release|x64.Build.0 = Release|x64
 		{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}.Release|x86.ActiveCfg = Release|Win32
@@ -115,38 +82,20 @@ Global
 		{34042455-D274-432D-9134-C9EA41FD1B54}.Debug|x64.Build.0 = Debug|x64
 		{34042455-D274-432D-9134-C9EA41FD1B54}.Debug|x86.ActiveCfg = Debug|Win32
 		{34042455-D274-432D-9134-C9EA41FD1B54}.Debug|x86.Build.0 = Debug|Win32
+		{34042455-D274-432D-9134-C9EA41FD1B54}.Performance Release|x64.ActiveCfg = Performance Release|x64
+		{34042455-D274-432D-9134-C9EA41FD1B54}.Performance Release|x64.Build.0 = Performance Release|x64
+		{34042455-D274-432D-9134-C9EA41FD1B54}.Performance Release|x86.ActiveCfg = Performance Release|Win32
 		{34042455-D274-432D-9134-C9EA41FD1B54}.Release|x64.ActiveCfg = Release|x64
 		{34042455-D274-432D-9134-C9EA41FD1B54}.Release|x64.Build.0 = Release|x64
 		{34042455-D274-432D-9134-C9EA41FD1B54}.Release|x86.ActiveCfg = Release|Win32
 		{34042455-D274-432D-9134-C9EA41FD1B54}.Release|x86.Build.0 = Release|Win32
-		{D77642A9-365C-420C-A726-469649D2927E}.Debug|x64.ActiveCfg = Debug|x64
-		{D77642A9-365C-420C-A726-469649D2927E}.Debug|x64.Build.0 = Debug|x64
-		{D77642A9-365C-420C-A726-469649D2927E}.Debug|x86.ActiveCfg = Debug|Win32
-		{D77642A9-365C-420C-A726-469649D2927E}.Debug|x86.Build.0 = Debug|Win32
-		{D77642A9-365C-420C-A726-469649D2927E}.Release|x64.ActiveCfg = Release|x64
-		{D77642A9-365C-420C-A726-469649D2927E}.Release|x64.Build.0 = Release|x64
-		{D77642A9-365C-420C-A726-469649D2927E}.Release|x86.ActiveCfg = Release|Win32
-		{D77642A9-365C-420C-A726-469649D2927E}.Release|x86.Build.0 = Release|Win32
-		{7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Debug|x64.ActiveCfg = Debug|x64
-		{7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Debug|x64.Build.0 = Debug|x64
-		{7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Debug|x86.ActiveCfg = Debug|Win32
-		{7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Debug|x86.Build.0 = Debug|Win32
-		{7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Release|x64.ActiveCfg = Release|x64
-		{7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Release|x64.Build.0 = Release|x64
-		{7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Release|x86.ActiveCfg = Release|Win32
-		{7AE5E38D-5731-404E-A4F3-229ADF981EFC}.Release|x86.Build.0 = Release|Win32
-		{EADAAA54-E304-4656-8263-E5E688FF323D}.Debug|x64.ActiveCfg = Debug|x64
-		{EADAAA54-E304-4656-8263-E5E688FF323D}.Debug|x64.Build.0 = Debug|x64
-		{EADAAA54-E304-4656-8263-E5E688FF323D}.Debug|x86.ActiveCfg = Debug|Win32
-		{EADAAA54-E304-4656-8263-E5E688FF323D}.Debug|x86.Build.0 = Debug|Win32
-		{EADAAA54-E304-4656-8263-E5E688FF323D}.Release|x64.ActiveCfg = Release|x64
-		{EADAAA54-E304-4656-8263-E5E688FF323D}.Release|x64.Build.0 = Release|x64
-		{EADAAA54-E304-4656-8263-E5E688FF323D}.Release|x86.ActiveCfg = Release|Win32
-		{EADAAA54-E304-4656-8263-E5E688FF323D}.Release|x86.Build.0 = Release|Win32
 		{CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Debug|x64.ActiveCfg = Debug|x64
 		{CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Debug|x64.Build.0 = Debug|x64
 		{CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Debug|x86.ActiveCfg = Debug|Win32
 		{CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Debug|x86.Build.0 = Debug|Win32
+		{CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Performance Release|x64.ActiveCfg = Performance Release|x64
+		{CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Performance Release|x64.Build.0 = Performance Release|x64
+		{CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Performance Release|x86.ActiveCfg = Performance Release|Win32
 		{CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Release|x64.ActiveCfg = Release|x64
 		{CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Release|x64.Build.0 = Release|x64
 		{CA46856A-1D1E-4F6F-A69C-6707D540BF36}.Release|x86.ActiveCfg = Release|Win32
@@ -155,6 +104,9 @@ Global
 		{150AF404-1F80-4A13-855B-4383C4A3326F}.Debug|x64.Build.0 = Debug|x64
 		{150AF404-1F80-4A13-855B-4383C4A3326F}.Debug|x86.ActiveCfg = Debug|Win32
 		{150AF404-1F80-4A13-855B-4383C4A3326F}.Debug|x86.Build.0 = Debug|Win32
+		{150AF404-1F80-4A13-855B-4383C4A3326F}.Performance Release|x64.ActiveCfg = Performance Release|x64
+		{150AF404-1F80-4A13-855B-4383C4A3326F}.Performance Release|x64.Build.0 = Performance Release|x64
+		{150AF404-1F80-4A13-855B-4383C4A3326F}.Performance Release|x86.ActiveCfg = Performance Release|Win32
 		{150AF404-1F80-4A13-855B-4383C4A3326F}.Release|x64.ActiveCfg = Release|x64
 		{150AF404-1F80-4A13-855B-4383C4A3326F}.Release|x64.Build.0 = Release|x64
 		{150AF404-1F80-4A13-855B-4383C4A3326F}.Release|x86.ActiveCfg = Release|Win32
@@ -162,31 +114,19 @@ Global
 		{53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Debug|x64.ActiveCfg = Debug|x64
 		{53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Debug|x64.Build.0 = Debug|x64
 		{53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Debug|x86.ActiveCfg = Debug|x64
+		{53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Performance Release|x64.ActiveCfg = Performance Release|x64
+		{53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Performance Release|x64.Build.0 = Performance Release|x64
+		{53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Performance Release|x86.ActiveCfg = Performance Release|x64
 		{53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Release|x64.ActiveCfg = Release|x64
 		{53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Release|x64.Build.0 = Release|x64
 		{53EE0C03-4419-4767-A91B-7FC7D4B3D2AA}.Release|x86.ActiveCfg = Release|x64
-		{9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}.Debug|x64.ActiveCfg = Debug|x64
-		{9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}.Debug|x64.Build.0 = Debug|x64
-		{9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}.Debug|x86.ActiveCfg = Debug|x64
-		{9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}.Release|x64.ActiveCfg = Release|x64
-		{9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}.Release|x64.Build.0 = Release|x64
-		{9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}.Release|x86.ActiveCfg = Release|x64
-		{E1BDB205-8994-4E49-8B35-172A84E7118C}.Debug|x64.ActiveCfg = Debug|x64
-		{E1BDB205-8994-4E49-8B35-172A84E7118C}.Debug|x64.Build.0 = Debug|x64
-		{E1BDB205-8994-4E49-8B35-172A84E7118C}.Debug|x86.ActiveCfg = Debug|x64
-		{E1BDB205-8994-4E49-8B35-172A84E7118C}.Release|x64.ActiveCfg = Release|x64
-		{E1BDB205-8994-4E49-8B35-172A84E7118C}.Release|x64.Build.0 = Release|x64
-		{E1BDB205-8994-4E49-8B35-172A84E7118C}.Release|x86.ActiveCfg = Release|x64
-		{CCA3D02C-5E5A-4A24-B34B-5961DFA93946}.Debug|x64.ActiveCfg = Debug|x64
-		{CCA3D02C-5E5A-4A24-B34B-5961DFA93946}.Debug|x64.Build.0 = Debug|x64
-		{CCA3D02C-5E5A-4A24-B34B-5961DFA93946}.Debug|x86.ActiveCfg = Debug|x64
-		{CCA3D02C-5E5A-4A24-B34B-5961DFA93946}.Release|x64.ActiveCfg = Release|x64
-		{CCA3D02C-5E5A-4A24-B34B-5961DFA93946}.Release|x64.Build.0 = Release|x64
-		{CCA3D02C-5E5A-4A24-B34B-5961DFA93946}.Release|x86.ActiveCfg = Release|x64
 		{D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Debug|x64.ActiveCfg = Debug|x64
 		{D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Debug|x64.Build.0 = Debug|x64
 		{D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Debug|x86.ActiveCfg = Debug|Win32
 		{D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Debug|x86.Build.0 = Debug|Win32
+		{D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Performance Release|x64.ActiveCfg = Performance Release|x64
+		{D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Performance Release|x64.Build.0 = Performance Release|x64
+		{D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Performance Release|x86.ActiveCfg = Performance Release|Win32
 		{D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Release|x64.ActiveCfg = Release|x64
 		{D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Release|x64.Build.0 = Release|x64
 		{D9A5823D-C472-40AC-B23A-21B1586CEEB0}.Release|x86.ActiveCfg = Release|Win32
@@ -195,6 +135,9 @@ Global
 		{546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Debug|x64.Build.0 = Debug|x64
 		{546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Debug|x86.ActiveCfg = Debug|Win32
 		{546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Debug|x86.Build.0 = Debug|Win32
+		{546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Performance Release|x64.ActiveCfg = Performance Release|x64
+		{546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Performance Release|x64.Build.0 = Performance Release|x64
+		{546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Performance Release|x86.ActiveCfg = Performance Release|Win32
 		{546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Release|x64.ActiveCfg = Release|x64
 		{546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Release|x64.Build.0 = Release|x64
 		{546C8D1F-127F-4EF4-914F-2A7F9367C0F9}.Release|x86.ActiveCfg = Release|Win32
@@ -203,34 +146,35 @@ Global
 		{36400E8D-3D04-430C-90A4-FC989E460B3C}.Debug|x64.Build.0 = Debug|x64
 		{36400E8D-3D04-430C-90A4-FC989E460B3C}.Debug|x86.ActiveCfg = Debug|Win32
 		{36400E8D-3D04-430C-90A4-FC989E460B3C}.Debug|x86.Build.0 = Debug|Win32
+		{36400E8D-3D04-430C-90A4-FC989E460B3C}.Performance Release|x64.ActiveCfg = Performance Release|x64
+		{36400E8D-3D04-430C-90A4-FC989E460B3C}.Performance Release|x64.Build.0 = Performance Release|x64
+		{36400E8D-3D04-430C-90A4-FC989E460B3C}.Performance Release|x86.ActiveCfg = Performance Release|Win32
 		{36400E8D-3D04-430C-90A4-FC989E460B3C}.Release|x64.ActiveCfg = Release|x64
 		{36400E8D-3D04-430C-90A4-FC989E460B3C}.Release|x64.Build.0 = Release|x64
 		{36400E8D-3D04-430C-90A4-FC989E460B3C}.Release|x86.ActiveCfg = Release|Win32
 		{36400E8D-3D04-430C-90A4-FC989E460B3C}.Release|x86.Build.0 = Release|Win32
-		{8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Debug|x64.ActiveCfg = Debug|x64
-		{8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Debug|x64.Build.0 = Debug|x64
-		{8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Debug|x86.ActiveCfg = Debug|Win32
-		{8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Debug|x86.Build.0 = Debug|Win32
-		{8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Release|x64.ActiveCfg = Release|x64
-		{8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Release|x64.Build.0 = Release|x64
-		{8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Release|x86.ActiveCfg = Release|Win32
-		{8DA841AA-42FF-40AA-8F12-BC654DF39FEF}.Release|x86.Build.0 = Release|Win32
-		{146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Debug|x64.ActiveCfg = Debug|x64
-		{146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Debug|x64.Build.0 = Debug|x64
-		{146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Debug|x86.ActiveCfg = Debug|Win32
-		{146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Debug|x86.Build.0 = Debug|Win32
-		{146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Release|x64.ActiveCfg = Release|x64
-		{146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Release|x64.Build.0 = Release|x64
-		{146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Release|x86.ActiveCfg = Release|Win32
-		{146C79F4-2CA1-43B8-A8FE-C86C4E9F6C63}.Release|x86.Build.0 = Release|Win32
 		{83327841-C283-4D46-A873-97AC674C68AC}.Debug|x64.ActiveCfg = Debug|x64
 		{83327841-C283-4D46-A873-97AC674C68AC}.Debug|x64.Build.0 = Debug|x64
 		{83327841-C283-4D46-A873-97AC674C68AC}.Debug|x86.ActiveCfg = Debug|Win32
 		{83327841-C283-4D46-A873-97AC674C68AC}.Debug|x86.Build.0 = Debug|Win32
+		{83327841-C283-4D46-A873-97AC674C68AC}.Performance Release|x64.ActiveCfg = Performance Release|x64
+		{83327841-C283-4D46-A873-97AC674C68AC}.Performance Release|x64.Build.0 = Performance Release|x64
+		{83327841-C283-4D46-A873-97AC674C68AC}.Performance Release|x86.ActiveCfg = Performance Release|Win32
 		{83327841-C283-4D46-A873-97AC674C68AC}.Release|x64.ActiveCfg = Release|x64
 		{83327841-C283-4D46-A873-97AC674C68AC}.Release|x64.Build.0 = Release|x64
 		{83327841-C283-4D46-A873-97AC674C68AC}.Release|x86.ActiveCfg = Release|Win32
 		{83327841-C283-4D46-A873-97AC674C68AC}.Release|x86.Build.0 = Release|Win32
+		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x64.ActiveCfg = Debug|x64
+		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x64.Build.0 = Debug|x64
+		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x86.ActiveCfg = Debug|Win32
+		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Debug|x86.Build.0 = Debug|Win32
+		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Performance Release|x64.ActiveCfg = Performance Release|x64
+		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Performance Release|x64.Build.0 = Performance Release|x64
+		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Performance Release|x86.ActiveCfg = Performance Release|Win32
+		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x64.ActiveCfg = Release|x64
+		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x64.Build.0 = Release|x64
+		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x86.ActiveCfg = Release|Win32
+		{F7037134-28C5-4EB9-BE5D-587E79A40628}.Release|x86.Build.0 = Release|Win32
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/CLKeySearchDevice/CLKeySearchDevice.cpp b/CLKeySearchDevice/CLKeySearchDevice.cpp
index 4035e64..5482a83 100644
--- a/CLKeySearchDevice/CLKeySearchDevice.cpp
+++ b/CLKeySearchDevice/CLKeySearchDevice.cpp
@@ -14,7 +14,6 @@ typedef struct {
     unsigned int digest[5];
 }CLDeviceResult;
 
-
 static void undoRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
 {
     unsigned int iv[5] = {
@@ -30,40 +29,49 @@ static void undoRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5]
     }
 }
 
-CLKeySearchDevice::CLKeySearchDevice(uint64_t device, int threads, int pointsPerThread, int blocks)
+CLKeySearchDevice::CLKeySearchDevice(uint64_t device, int threads, int pointsPerThread, int blocks, int compressionMode)
 {
     _threads = threads;
     _blocks = blocks;
     _points = pointsPerThread * threads * blocks;
     _device = (cl_device_id)device;
 
-
     if(threads <= 0 || threads % 32 != 0) {
-        throw KeySearchException("The number of threads must be a multiple of 32");
+        throw KeySearchException("KEYSEARCH_THREAD_MULTIPLE_EXCEPTION", "The number of threads must be a multiple of 32");
     }
 
     if(pointsPerThread <= 0) {
-        throw KeySearchException("At least 1 point per thread required");
+        throw KeySearchException("KEYSEARCH_MINIMUM_POINT_EXCEPTION", "At least 1 point per thread required");
     }
 
+    std::string options = "";
+
+    switch (compressionMode) {
+        case PointCompressionType::COMPRESSED:
+            options += " -DCOMPRESSION_COMPRESSED";
+        break;
+        case PointCompressionType::UNCOMPRESSED:
+            options += " -DCOMPRESSION_UNCOMPRESSED";
+        break;
+        case PointCompressionType::BOTH:
+            options += " -DCOMPRESSION_BOTH";
+        break;
+    }
     try {
         // Create the context
         _clContext = new cl::CLContext(_device);
         Logger::log(LogLevel::Info, "Compiling OpenCL kernels...");
-        _clProgram = new cl::CLProgram(*_clContext, _bitcrack_cl);
+        _clProgram = new cl::CLProgram(*_clContext, _bitcrack_cl, options);
 
         // Load the kernels
-        _initKeysKernel = new cl::CLKernel(*_clProgram, "multiplyStepKernel");
-        _initKeysKernel->getWorkGroupSize();
-
-        _stepKernel = new cl::CLKernel(*_clProgram, "keyFinderKernel");
-        _stepKernelWithDouble = new cl::CLKernel(*_clProgram, "keyFinderKernelWithDouble");
+        _initKeysKernel = new cl::CLKernel(*_clProgram, "_initKeysKernel");
+        _stepKernel = new cl::CLKernel(*_clProgram, "_stepKernel");
 
         _globalMemSize = _clContext->getGlobalMemorySize();
 
         _deviceName = _clContext->getDeviceName();
     } catch(cl::CLException ex) {
-        throw KeySearchException(ex.msg);
+        throw KeySearchException(ex.msg, ex.description);
     }
 
     _iterations = 0;
@@ -81,7 +89,6 @@ CLKeySearchDevice::~CLKeySearchDevice()
     _clContext->free(_deviceResultsCount);
 
     delete _stepKernel;
-    delete _stepKernelWithDouble;
     delete _initKeysKernel;
     delete _clContext;
 }
@@ -98,6 +105,9 @@ uint64_t CLKeySearchDevice::getOptimalBloomFilterMask(double p, size_t n)
 void CLKeySearchDevice::initializeBloomFilter(const std::vector<struct hash160> &targets, uint64_t mask)
 {
     size_t sizeInWords = (mask + 1) / 32;
+    _targetMemSize = sizeInWords * sizeof(uint32_t);
+
+    Logger::log(LogLevel::Info, "Initializing BloomFilter (" + util::format("%.1f", (double)_targetMemSize / (double)(1024 * 1024)) + "MB)");
 
     uint32_t *buf = new uint32_t[sizeInWords];
 
@@ -130,13 +140,10 @@ void CLKeySearchDevice::initializeBloomFilter(const std::vector<struct hash160>
         }
     }
 
-
-    _targetMemSize = sizeInWords * sizeof(uint32_t);
-
     _deviceTargetList.mask = mask;
-    _deviceTargetList.ptr = _clContext->malloc(sizeInWords * sizeof(uint32_t));
+    _deviceTargetList.ptr = _clContext->malloc(_targetMemSize);
     _deviceTargetList.size = targets.size();
-    _clContext->copyHostToDevice(buf, _deviceTargetList.ptr, sizeInWords * sizeof(uint32_t));
+    _clContext->copyHostToDevice(buf, _deviceTargetList.ptr, _targetMemSize);
 
     delete[] buf;
 }
@@ -146,6 +153,20 @@ void CLKeySearchDevice::allocateBuffers()
     size_t numKeys = (size_t)_points;
     size_t size = numKeys * 8 * sizeof(unsigned int);
 
+    _bufferMemSize = 
+        size +                           // _x
+        size +                           // _y
+        size +                           // _chain
+        size +                           // _privateKeys
+        256 * 8 * sizeof(unsigned int) + // _xTable
+        256 * 8 * sizeof(unsigned int) + // _yTable
+        8 * sizeof(unsigned int) +       // _xInc
+        8 * sizeof(unsigned int) +       // _yInc
+        128 * sizeof(CLDeviceResult) +   // _deviceResults
+        sizeof(unsigned int);            // _deviceResultsCount
+
+    Logger::log(LogLevel::Info, "Allocating Memory for Buffers (" + util::format("%.1f", (double)_bufferMemSize / (double)(1024 * 1024)) + "MB)");
+
     // X values
     _x = _clContext->malloc(size);
     _clContext->memset(_x, -1, size);
@@ -187,7 +208,7 @@ void CLKeySearchDevice::setIncrementor(secp256k1::ecpoint &p)
 void CLKeySearchDevice::init(const secp256k1::uint256 &start, int compression, const secp256k1::uint256 &stride)
 {
     if(start.cmp(secp256k1::N) >= 0) {
-        throw KeySearchException("Starting key is out of range");
+        throw KeySearchException("KEYSEARCH_STARTINGKEY_OUT_OF_RANGE", "Starting key is out of range");
     }
 
     _start = start;
@@ -202,80 +223,36 @@ void CLKeySearchDevice::init(const secp256k1::uint256 &start, int compression, c
         generateStartingPoints();
 
         // Set the incrementor
-        secp256k1::ecpoint g = secp256k1::G();
-        secp256k1::ecpoint p = secp256k1::multiplyPoint(secp256k1::uint256((uint64_t)_points ) * _stride, g);
-
+        secp256k1::ecpoint p = secp256k1::multiplyPoint(secp256k1::uint256((uint64_t)_points ) * _stride, secp256k1::G());
         setIncrementor(p);
     } catch(cl::CLException ex) {
-        throw KeySearchException(ex.msg);
+        throw KeySearchException(ex.msg, ex.description);
     }
 }
 
 void CLKeySearchDevice::doStep()
 {
     try {
-        uint64_t numKeys = (uint64_t)_points;
-
-        if(_iterations < 2 && _start.cmp(numKeys) <= 0) {
-
-            _stepKernelWithDouble->set_args(
-                _points,
-                _compression,
-                _chain,
-                _x,
-                _y,
-                _xInc,
-                _yInc,
-                _deviceTargetList.ptr,
-                _deviceTargetList.size,
-                _deviceTargetList.mask,
-                _deviceResults,
-                _deviceResultsCount);
-            _stepKernelWithDouble->call(_blocks, _threads);
-        } else {
-
-            _stepKernel->set_args(
-                _points,
-                _compression,
-                _chain,
-                _x,
-                _y,
-                _xInc,
-                _yInc,
-                _deviceTargetList.ptr,
-                _deviceTargetList.size,
-                _deviceTargetList.mask,
-                _deviceResults,
-                _deviceResultsCount);
-            _stepKernel->call(_blocks, _threads);
-        }
+        _stepKernel->set_args(
+            _points,
+            _chain,
+            _x,
+            _y,
+            _xInc,
+            _yInc,
+            _deviceTargetList.ptr,
+            _deviceTargetList.mask,
+            _deviceResults,
+            _deviceResultsCount);
+        _stepKernel->call(_blocks, _threads);
         fflush(stdout);
 
         getResultsInternal();
 
         _iterations++;
     } catch(cl::CLException ex) {
-        throw KeySearchException(ex.msg);
-    }
-}
-
-void CLKeySearchDevice::setTargetsList()
-{
-    size_t count = _targetList.size();
-
-    _targets = _clContext->malloc(5 * sizeof(unsigned int) * count);
-
-    for(size_t i = 0; i < count; i++) {
-        unsigned int h[5];
-
-        undoRMD160FinalRound(_targetList[i].h, h);
-
-        _clContext->copyHostToDevice(h, _targets, i * 5 * sizeof(unsigned int), 5 * sizeof(unsigned int));
+        throw KeySearchException(ex.msg, ex.description);
     }
-
-    _targetMemSize = count * 5 * sizeof(unsigned int);
-    _deviceTargetList.ptr = _targets;
-    _deviceTargetList.size = count;
 }
 
 void CLKeySearchDevice::setBloomFilter()
@@ -292,11 +269,7 @@ void CLKeySearchDevice::setTargetsInternal()
         _clContext->free(_deviceTargetList.ptr);
     }
 
-    if(_targetList.size() < 16) {
-        setTargetsList();
-    } else {
-        setBloomFilter();
-    }
+    setBloomFilter();
 }
 
 void CLKeySearchDevice::setTargets(const std::set<KeySearchTarget> &targets)
@@ -311,7 +284,7 @@ void CLKeySearchDevice::setTargets(const std::set<KeySearchTarget> &targets)
 
         setTargetsInternal();
     } catch(cl::CLException ex) {
-        throw KeySearchException(ex.msg);
+        throw KeySearchException(ex.msg, ex.description);
     }
 }
 
@@ -338,7 +311,7 @@ std::string CLKeySearchDevice::getDeviceName()
 
 void CLKeySearchDevice::getMemoryInfo(uint64_t &freeMem, uint64_t &totalMem)
 {
-    freeMem = _globalMemSize - _targetMemSize - _pointsMemSize;
+    freeMem = _globalMemSize - _targetMemSize - _pointsMemSize - _bufferMemSize;
     totalMem = _globalMemSize;
 }
 
@@ -379,7 +352,6 @@ void CLKeySearchDevice::removeTargetFromList(const unsigned int hash[5])
     }
 }
 
-
 void CLKeySearchDevice::getResultsInternal()
 {
     unsigned int numResults = 0;
@@ -418,6 +390,8 @@ void CLKeySearchDevice::getResultsInternal()
 
             _results.push_back(minerResult);
         }
+        
+        delete[] ptr;
 
         // Reset device counter
         numResults = 0;
@@ -425,52 +399,6 @@ void CLKeySearchDevice::getResultsInternal()
     }
 }
 
-void CLKeySearchDevice::selfTest()
-{
-    uint64_t numPoints = (uint64_t)_points;
-    std::vector<secp256k1::uint256> privateKeys;
-
-    // Generate key pairs for k, k+1, k+2 ... k + <total points in parallel - 1>
-    secp256k1::uint256 privKey = _start;
-
-    privateKeys.push_back(_start);
-
-    for(uint64_t i = 1; i < numPoints; i++) {
-        privKey = privKey.add(_stride);
-        privateKeys.push_back(privKey);
-    }
-
-    unsigned int *xBuf = new unsigned int[numPoints * 8];
-    unsigned int *yBuf = new unsigned int[numPoints * 8];
-
-    _clContext->copyDeviceToHost(_x, xBuf, sizeof(unsigned int) * 8 * numPoints);
-    _clContext->copyDeviceToHost(_y, yBuf, sizeof(unsigned int) * 8 * numPoints);
-
-    for(int index = 0; index < _points; index++) {
-        secp256k1::uint256 privateKey = privateKeys[index];
-
-        secp256k1::uint256 x = readBigInt(xBuf, index);
-        secp256k1::uint256 y = readBigInt(yBuf, index);
-
-        secp256k1::ecpoint p1(x, y);
-        secp256k1::ecpoint p2 = secp256k1::multiplyPoint(privateKey, secp256k1::G());
-
-        if(!secp256k1::pointExists(p1)) {
-            throw std::string("Validation failed: invalid point");
-        }
-
-        if(!secp256k1::pointExists(p2)) {
-            throw std::string("Validation failed: invalid point");
-        }
-
-        if(!(p1 == p2)) {
-            throw std::string("Validation failed: points do not match");
-        }
-    }
-}
-
-
-
 secp256k1::uint256 CLKeySearchDevice::readBigInt(unsigned int *src, int idx)
 {
     unsigned int value[8] = {0};
@@ -490,66 +418,65 @@ void CLKeySearchDevice::initializeBasePoints()
     std::vector<secp256k1::ecpoint> table;
 
     table.push_back(secp256k1::G());
-    for(uint64_t i = 1; i < 256; i++) {
+    for (uint64_t i = 1; i < 256; i++) {
 
         secp256k1::ecpoint p = doublePoint(table[i - 1]);
-        if(!pointExists(p)) {
+#ifdef DEBUG
+        if (!pointExists(p)) {
             throw std::string("Point does not exist!");
         }
+#endif
         table.push_back(p);
     }
 
     size_t count = 256;
 
-    unsigned int *tmpX = new unsigned int[count * 8];
-    unsigned int *tmpY = new unsigned int[count * 8];
+    unsigned int* tmpX = new unsigned int[count * 8];
+    unsigned int* tmpY = new unsigned int[count * 8];
 
-    for(int i = 0; i < 256; i++) {
+    for (int i = 0; i < 256; i++) {
         unsigned int bufX[8];
         unsigned int bufY[8];
         table[i].x.exportWords(bufX, 8, secp256k1::uint256::BigEndian);
         table[i].y.exportWords(bufY, 8, secp256k1::uint256::BigEndian);
 
-        for(int j = 0; j < 8; j++) {
+        for (int j = 0; j < 8; j++) {
             tmpX[i * 8 + j] = bufX[j];
             tmpY[i * 8 + j] = bufY[j];
         }
     }
 
+    table.clear();
+    table.shrink_to_fit();
+
     _clContext->copyHostToDevice(tmpX, _xTable, count * 8 * sizeof(unsigned int));
+    delete[] tmpX;
 
     _clContext->copyHostToDevice(tmpY, _yTable, count * 8 * sizeof(unsigned int));
+    delete[] tmpY;
 }
 
-
-
 void CLKeySearchDevice::generateStartingPoints()
 {
     uint64_t totalPoints = (uint64_t)_points;
+    // TODO: Magic Number 40?
     uint64_t totalMemory = totalPoints * 40;
 
-    std::vector<secp256k1::uint256> exponents;
-
     initializeBasePoints();
 
     _pointsMemSize = totalPoints * sizeof(unsigned int) * 16 + _points * sizeof(unsigned int) * 8;
 
     Logger::log(LogLevel::Info, "Generating " + util::formatThousands(totalPoints) + " starting points (" + util::format("%.1f", (double)totalMemory / (double)(1024 * 1024)) + "MB)");
 
+    unsigned int *privateKeys = new unsigned int[8 * totalPoints];
+
     // Generate key pairs for k, k+1, k+2 ... k + <total points in parallel - 1>
     secp256k1::uint256 privKey = _start;
-
-    exponents.push_back(privKey);
-
+    splatBigInt(privateKeys, 0, privKey);
+    
     for(uint64_t i = 1; i < totalPoints; i++) {
         privKey = privKey.add(_stride);
-        exponents.push_back(privKey);
-    }
-
-    unsigned int *privateKeys = new unsigned int[8 * totalPoints];
-
-    for(int index = 0; index < _points; index++) {
-        splatBigInt(privateKeys, index, exponents[index]);
+        splatBigInt(privateKeys, i, privKey);
     }
 
     // Copy to device
@@ -563,7 +490,7 @@ void CLKeySearchDevice::generateStartingPoints()
         _initKeysKernel->set_args(_points, i, _privateKeys, _chain, _xTable, _yTable, _x, _y);
         _initKeysKernel->call(_blocks, _threads);
 
-        if(((double)(i+1) / 256.0) * 100.0 >= pct) {
+        if(((double)(i+1.0) / 256.0) * 100.0 >= pct) {
             Logger::log(LogLevel::Info, util::format("%.1f%%", pct));
             pct += 10.0;
         }
@@ -575,7 +502,5 @@ void CLKeySearchDevice::generateStartingPoints()
 
 secp256k1::uint256 CLKeySearchDevice::getNextKey()
 {
-    uint64_t totalPoints = (uint64_t)_points * _threads * _blocks;
-
-    return _start + secp256k1::uint256(totalPoints) * _iterations * _stride;
+    return _start + secp256k1::uint256((uint64_t)_points) * _iterations * _stride;
 }
\ No newline at end of file
diff --git a/CLKeySearchDevice/CLKeySearchDevice.h b/CLKeySearchDevice/CLKeySearchDevice.h
index 8a5497d..876a2ec 100644
--- a/CLKeySearchDevice/CLKeySearchDevice.h
+++ b/CLKeySearchDevice/CLKeySearchDevice.h
@@ -1,5 +1,5 @@
-#ifndef _CL_KEYSEARCH_DEVICE_H
-#define _CL_KEYSEARCH_DEVICE_H
+#ifndef CL_KEYSEARCH_DEVICE_H
+#define CL_KEYSEARCH_DEVICE_H
 
 #include "KeySearchDevice.h"
 #include "clContext.h"
@@ -18,10 +18,10 @@ class CLKeySearchDevice : public KeySearchDevice {
     cl::CLProgram *_clProgram = NULL;
     cl::CLKernel *_initKeysKernel = NULL;
     cl::CLKernel *_stepKernel = NULL;
-    cl::CLKernel *_stepKernelWithDouble = NULL;
 
     uint64_t _globalMemSize = 0;
     uint64_t _pointsMemSize = 0;
+    uint64_t _bufferMemSize = 0;
     uint64_t _targetMemSize = 0;
 
     CLTargetList _deviceTargetList;
@@ -81,14 +81,9 @@ class CLKeySearchDevice : public KeySearchDevice {
 
     void initializeBasePoints();
 
-    int getIndex(int block, int thread, int idx);
-
-    void splatBigInt(unsigned int *dest, int block, int thread, int idx, const secp256k1::uint256 &i);
     void splatBigInt(unsigned int *dest, int idx, secp256k1::uint256 &k);
     secp256k1::uint256 readBigInt(unsigned int *src, int idx);
 
-    void selfTest();
-
     bool _useBloomFilter = false;
 
     void setTargetsInternal();
@@ -101,15 +96,13 @@ class CLKeySearchDevice : public KeySearchDevice {
 
     void removeTargetFromList(const unsigned int hash[5]);
 
-    uint32_t getPrivateKeyOffset(int thread, int block, int idx);
-
     void initializeBloomFilter(const std::vector<struct hash160> &targets, uint64_t mask);
 
     uint64_t getOptimalBloomFilterMask(double p, size_t n);
 
 public:
 
-    CLKeySearchDevice(uint64_t device, int threads, int pointsPerThread, int blocks = 0);
+    CLKeySearchDevice(uint64_t device, int threads, int pointsPerThread, int blocks = 0, int compressionMode = PointCompressionType::COMPRESSED);
     ~CLKeySearchDevice();
 
 
@@ -138,4 +131,3 @@ class CLKeySearchDevice : public KeySearchDevice {
 };
 
 #endif
-
diff --git a/CLKeySearchDevice/CLKeySearchDevice.vcxproj b/CLKeySearchDevice/CLKeySearchDevice.vcxproj
index 4326e0f..6ea2683 100644
--- a/CLKeySearchDevice/CLKeySearchDevice.vcxproj
+++ b/CLKeySearchDevice/CLKeySearchDevice.vcxproj
@@ -5,6 +5,14 @@
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|Win32">
+      <Configuration>Performance Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|x64">
+      <Configuration>Performance Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -28,26 +36,40 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PreferredToolArchitecture>x64</PreferredToolArchitecture>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
@@ -60,6 +82,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -68,6 +94,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -94,7 +124,7 @@
       </Command>
     </PostBuildEvent>
     <PreBuildEvent>
-      <Command>type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl keysearch.cl &gt; bitcrack.cl
+      <Command>type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl &gt; bitcrack.cl
 $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl</Command>
       <Message>Embed bitcrack.cl into bitcrack_cl.cpp</Message>
     </PreBuildEvent>
@@ -109,7 +139,22 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl</Comma
       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
     </ClCompile>
     <PreBuildEvent>
-      <Command>type ripemd160.cl secp256k1.cl sha256.cl keysearch.cl &gt; bitcrack.cl
+      <Command>type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl &gt; bitcrack.cl
+$(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl</Command>
+      <Message>Embed bitcrack.cl into bitcrack_cl.cpp</Message>
+    </PreBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>$(SolutionDir)\KeyFinderLib;$(SolutionDir)\clUtil;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;$(SolutionDir)\secp256k1lib;$(SolutionDir)\Logger;$(SolutionDir)\util;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <PreBuildEvent>
+      <Command>type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl &gt; bitcrack.cl
 $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl</Command>
       <Message>Embed bitcrack.cl into bitcrack_cl.cpp</Message>
     </PreBuildEvent>
@@ -128,7 +173,33 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl</Comma
       </Command>
     </PostBuildEvent>
     <PreBuildEvent>
-      <Command>type ripemd160.cl secp256k1.cl sha256.cl keysearch.cl &gt; bitcrack.cl
+      <Command>type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl &gt; bitcrack.cl
+$(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl</Command>
+      <Message>Embed bitcrack.cl into bitcrack_cl.cpp</Message>
+    </PreBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>$(SolutionDir)\KeyFinderLib;$(SolutionDir)\clUtil;$(OPENCL_INCLUDE);$(SolutionDir)\secp256k1lib;$(SolutionDir)\Logger;$(SolutionDir)\util;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <PreBuildEvent>
+      <Command>type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl &gt; bitcrack.cl
 $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl</Command>
       <Message>Embed bitcrack.cl into bitcrack_cl.cpp</Message>
     </PreBuildEvent>
@@ -149,7 +220,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl</Comma
       <OptimizeReferences>true</OptimizeReferences>
     </Link>
     <PreBuildEvent>
-      <Command>type ripemd160.cl secp256k1.cl sha256.cl keysearch.cl &gt; bitcrack.cl
+      <Command>type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl &gt; bitcrack.cl
 $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl</Command>
       <Message>Embed bitcrack.cl into bitcrack_cl.cpp</Message>
     </PreBuildEvent>
@@ -162,6 +233,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl</Comma
     <ClCompile Include="CLKeySearchDevice.cpp" />
   </ItemGroup>
   <ItemGroup>
+    <None Include="bitcrack.cl" />
     <None Include="keysearch.cl" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
diff --git a/CLKeySearchDevice/Makefile b/CLKeySearchDevice/Makefile
index 2da1746..41b85dc 100644
--- a/CLKeySearchDevice/Makefile
+++ b/CLKeySearchDevice/Makefile
@@ -2,7 +2,7 @@ NAME=CLKeySearchDevice
 CPPSRC:=$(wildcard *.cpp)
 
 all:
-	cat ../clMath/sha256.cl ../clMath/secp256k1.cl ../clMath/ripemd160.cl keysearch.cl > bitcrack.cl
+	cat ../clMath/sha256.cl ../clMath/secp256k1.cl ../clMath/ripemd160.cl bloomfilter.cl bitcoin.cl keysearch.cl > bitcrack.cl
 	${BINDIR}/embedcl bitcrack.cl bitcrack_cl.cpp _bitcrack_cl
 
 	for file in ${CPPSRC} bitcrack_cl.cpp; do\
diff --git a/CLKeySearchDevice/bitcoin.cl b/CLKeySearchDevice/bitcoin.cl
new file mode 100644
index 0000000..3da6dbb
--- /dev/null
+++ b/CLKeySearchDevice/bitcoin.cl
@@ -0,0 +1,46 @@
+#ifndef BITCOIN_CL
+#define BITCOIN_CL
+
+#ifndef endian
+#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24)
+#endif
+
+void hashPublicKeyCompressed(const uint256_t x, const unsigned int yParity, unsigned int digest[5])
+{
+    __private unsigned int hash[8];
+
+    sha256PublicKeyCompressed(x.v, yParity, hash);
+
+    // Swap to little-endian
+    hash[0] = endian(hash[0]);
+    hash[1] = endian(hash[1]);
+    hash[2] = endian(hash[2]);
+    hash[3] = endian(hash[3]);
+    hash[4] = endian(hash[4]);
+    hash[5] = endian(hash[5]);
+    hash[6] = endian(hash[6]);
+    hash[7] = endian(hash[7]);
+
+    ripemd160sha256NoFinal(hash, digest);
+}
+
+void hashPublicKey(const uint256_t x, const uint256_t y, unsigned int digest[5])
+{
+    __private unsigned int hash[8];
+
+    sha256PublicKey(x.v, y.v, hash);
+
+    // Swap to little-endian
+    hash[0] = endian(hash[0]);
+    hash[1] = endian(hash[1]);
+    hash[2] = endian(hash[2]);
+    hash[3] = endian(hash[3]);
+    hash[4] = endian(hash[4]);
+    hash[5] = endian(hash[5]);
+    hash[6] = endian(hash[6]);
+    hash[7] = endian(hash[7]);
+
+    ripemd160sha256NoFinal(hash, digest);
+}
+
+#endif
diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl
index 974faef..32ac980 100644
--- a/CLKeySearchDevice/bitcrack.cl
+++ b/CLKeySearchDevice/bitcrack.cl
@@ -1,24 +1,17 @@
-#ifndef _RIPEMD160_CL
-#define _RIPEMD160_CL
+#ifndef RIPEMD160_CL
+#define RIPEMD160_CL
 
+#ifndef endian
+#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24)
+#endif
 
-__constant unsigned int _RIPEMD160_IV[5] = {
-    0x67452301,
-    0xefcdab89,
-    0x98badcfe,
-    0x10325476,
-    0xc3d2e1f0
+__constant unsigned int RIPEMD160_IV[5] = {
+    0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0,
 };
 
-__constant unsigned int _K0 = 0x5a827999;
-__constant unsigned int _K1 = 0x6ed9eba1;
-__constant unsigned int _K2 = 0x8f1bbcdc;
-__constant unsigned int _K3 = 0xa953fd4e;
-
-__constant unsigned int _K4 = 0x7a6d76e9;
-__constant unsigned int _K5 = 0x6d703ef3;
-__constant unsigned int _K6 = 0x5c4dd124;
-__constant unsigned int _K7 = 0x50a28be6;
+__constant unsigned int K[8] = {
+    0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xa953fd4e, 0x7a6d76e9, 0x6d703ef3, 0x5c4dd124, 0x50a28be6
+};
 
 #define rotl(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
 
@@ -38,22 +31,22 @@ __constant unsigned int _K7 = 0x50a28be6;
     c = rotl((c), 10)
 
 #define GG(a, b, c, d, e, x, s)\
-    a += G((b), (c), (d)) + (x) + _K0;\
+    a += G((b), (c), (d)) + (x) + K[0];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
 #define HH(a, b, c, d, e, x, s)\
-    a += H((b), (c), (d)) + (x) + _K1;\
+    a += H((b), (c), (d)) + (x) + K[1];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
 #define II(a, b, c, d, e, x, s)\
-    a += I((b), (c), (d)) + (x) + _K2;\
+    a += I((b), (c), (d)) + (x) + K[2];\
     a = rotl((a), (s)) + e;\
     c = rotl((c), 10)
 
 #define JJ(a, b, c, d, e, x, s)\
-    a += J((b), (c), (d)) + (x) + _K3;\
+    a += J((b), (c), (d)) + (x) + K[3];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
@@ -63,523 +56,298 @@ __constant unsigned int _K7 = 0x50a28be6;
     c = rotl((c), 10)
 
 #define GGG(a, b, c, d, e, x, s)\
-    a += G((b), (c), (d)) + x + _K4;\
+    a += G((b), (c), (d)) + x + K[4];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
 #define HHH(a, b, c, d, e, x, s)\
-    a += H((b), (c), (d)) + (x) + _K5;\
+    a += H((b), (c), (d)) + (x) + K[5];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
 #define III(a, b, c, d, e, x, s)\
-    a += I((b), (c), (d)) + (x) + _K6;\
+    a += I((b), (c), (d)) + (x) + K[6];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
 #define JJJ(a, b, c, d, e, x, s)\
-    a += J((b), (c), (d)) + (x) + _K7;\
+    a += J((b), (c), (d)) + (x) + K[7];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
-
-void ripemd160sha256(const unsigned int x[8], unsigned int digest[5])
+void ripemd160p1(const unsigned int x[8], unsigned int digest[5])
 {
-    unsigned int a1 = _RIPEMD160_IV[0];
-    unsigned int b1 = _RIPEMD160_IV[1];
-    unsigned int c1 = _RIPEMD160_IV[2];
-    unsigned int d1 = _RIPEMD160_IV[3];
-    unsigned int e1 = _RIPEMD160_IV[4];
-
-    const unsigned int x8 = 0x00000080;
-    const unsigned int x14 = 256;
+    __private unsigned int a = RIPEMD160_IV[0];
+    __private unsigned int b = RIPEMD160_IV[1];
+    __private unsigned int c = RIPEMD160_IV[2];
+    __private unsigned int d = RIPEMD160_IV[3];
+    __private unsigned int e = RIPEMD160_IV[4];
 
     /* round 1 */
-    FF(a1, b1, c1, d1, e1, x[0], 11);
-    FF(e1, a1, b1, c1, d1, x[1], 14);
-    FF(d1, e1, a1, b1, c1, x[2], 15);
-    FF(c1, d1, e1, a1, b1, x[3], 12);
-    FF(b1, c1, d1, e1, a1, x[4], 5);
-    FF(a1, b1, c1, d1, e1, x[5], 8);
-    FF(e1, a1, b1, c1, d1, x[6], 7);
-    FF(d1, e1, a1, b1, c1, x[7], 9);
-    FF(c1, d1, e1, a1, b1, x8, 11);
-    FF(b1, c1, d1, e1, a1, 0, 13);
-    FF(a1, b1, c1, d1, e1, 0, 14);
-    FF(e1, a1, b1, c1, d1, 0, 15);
-    FF(d1, e1, a1, b1, c1, 0, 6);
-    FF(c1, d1, e1, a1, b1, 0, 7);
-    FF(b1, c1, d1, e1, a1, x14, 9);
-    FF(a1, b1, c1, d1, e1, 0, 8);
+    FF(a, b, c, d, e, x[0], 11);
+    FF(e, a, b, c, d, x[1], 14);
+    FF(d, e, a, b, c, x[2], 15);
+    FF(c, d, e, a, b, x[3], 12);
+    FF(b, c, d, e, a, x[4], 5);
+    FF(a, b, c, d, e, x[5], 8);
+    FF(e, a, b, c, d, x[6], 7);
+    FF(d, e, a, b, c, x[7], 9);
+    FF(c, d, e, a, b, 128, 11);
+    FF(b, c, d, e, a, 0, 13);
+    FF(a, b, c, d, e, 0, 14);
+    FF(e, a, b, c, d, 0, 15);
+    FF(d, e, a, b, c, 0, 6);
+    FF(c, d, e, a, b, 0, 7);
+    FF(b, c, d, e, a, 256, 9);
+    FF(a, b, c, d, e, 0, 8);
 
     /* round 2 */
-    GG(e1, a1, b1, c1, d1, x[7], 7);
-    GG(d1, e1, a1, b1, c1, x[4], 6);
-    GG(c1, d1, e1, a1, b1, 0, 8);
-    GG(b1, c1, d1, e1, a1, x[1], 13);
-    GG(a1, b1, c1, d1, e1, 0, 11);
-    GG(e1, a1, b1, c1, d1, x[6], 9);
-    GG(d1, e1, a1, b1, c1, 0, 7);
-    GG(c1, d1, e1, a1, b1, x[3], 15);
-    GG(b1, c1, d1, e1, a1, 0, 7);
-    GG(a1, b1, c1, d1, e1, x[0], 12);
-    GG(e1, a1, b1, c1, d1, 0, 15);
-    GG(d1, e1, a1, b1, c1, x[5], 9);
-    GG(c1, d1, e1, a1, b1, x[2], 11);
-    GG(b1, c1, d1, e1, a1, x14, 7);
-    GG(a1, b1, c1, d1, e1, 0, 13);
-    GG(e1, a1, b1, c1, d1, x8, 12);
+    GG(e, a, b, c, d, x[7], 7);
+    GG(d, e, a, b, c, x[4], 6);
+    GG(c, d, e, a, b, 0, 8);
+    GG(b, c, d, e, a, x[1], 13);
+    GG(a, b, c, d, e, 0, 11);
+    GG(e, a, b, c, d, x[6], 9);
+    GG(d, e, a, b, c, 0, 7);
+    GG(c, d, e, a, b, x[3], 15);
+    GG(b, c, d, e, a, 0, 7);
+    GG(a, b, c, d, e, x[0], 12);
+    GG(e, a, b, c, d, 0, 15);
+    GG(d, e, a, b, c, x[5], 9);
+    GG(c, d, e, a, b, x[2], 11);
+    GG(b, c, d, e, a, 256, 7);
+    GG(a, b, c, d, e, 0, 13);
+    GG(e, a, b, c, d, 0x80, 12);
 
     /* round 3 */
-    HH(d1, e1, a1, b1, c1, x[3], 11);
-    HH(c1, d1, e1, a1, b1, 0, 13);
-    HH(b1, c1, d1, e1, a1, x14, 6);
-    HH(a1, b1, c1, d1, e1, x[4], 7);
-    HH(e1, a1, b1, c1, d1, 0, 14);
-    HH(d1, e1, a1, b1, c1, 0, 9);
-    HH(c1, d1, e1, a1, b1, x8, 13);
-    HH(b1, c1, d1, e1, a1, x[1], 15);
-    HH(a1, b1, c1, d1, e1, x[2], 14);
-    HH(e1, a1, b1, c1, d1, x[7], 8);
-    HH(d1, e1, a1, b1, c1, x[0], 13);
-    HH(c1, d1, e1, a1, b1, x[6], 6);
-    HH(b1, c1, d1, e1, a1, 0, 5);
-    HH(a1, b1, c1, d1, e1, 0, 12);
-    HH(e1, a1, b1, c1, d1, x[5], 7);
-    HH(d1, e1, a1, b1, c1, 0, 5);
+    HH(d, e, a, b, c, x[3], 11);
+    HH(c, d, e, a, b, 0, 13);
+    HH(b, c, d, e, a, 256, 6);
+    HH(a, b, c, d, e, x[4], 7);
+    HH(e, a, b, c, d, 0, 14);
+    HH(d, e, a, b, c, 0, 9);
+    HH(c, d, e, a, b, 0x80, 13);
+    HH(b, c, d, e, a, x[1], 15);
+    HH(a, b, c, d, e, x[2], 14);
+    HH(e, a, b, c, d, x[7], 8);
+    HH(d, e, a, b, c, x[0], 13);
+    HH(c, d, e, a, b, x[6], 6);
+    HH(b, c, d, e, a, 0, 5);
+    HH(a, b, c, d, e, 0, 12);
+    HH(e, a, b, c, d, x[5], 7);
+    HH(d, e, a, b, c, 0, 5);
 
     /* round 4 */
-    II(c1, d1, e1, a1, b1, x[1], 11);
-    II(b1, c1, d1, e1, a1, 0, 12);
-    II(a1, b1, c1, d1, e1, 0, 14);
-    II(e1, a1, b1, c1, d1, 0, 15);
-    II(d1, e1, a1, b1, c1, x[0], 14);
-    II(c1, d1, e1, a1, b1, x8, 15);
-    II(b1, c1, d1, e1, a1, 0, 9);
-    II(a1, b1, c1, d1, e1, x[4], 8);
-    II(e1, a1, b1, c1, d1, 0, 9);
-    II(d1, e1, a1, b1, c1, x[3], 14);
-    II(c1, d1, e1, a1, b1, x[7], 5);
-    II(b1, c1, d1, e1, a1, 0, 6);
-    II(a1, b1, c1, d1, e1, x14, 8);
-    II(e1, a1, b1, c1, d1, x[5], 6);
-    II(d1, e1, a1, b1, c1, x[6], 5);
-    II(c1, d1, e1, a1, b1, x[2], 12);
+    II(c, d, e, a, b, x[1], 11);
+    II(b, c, d, e, a, 0, 12);
+    II(a, b, c, d, e, 0, 14);
+    II(e, a, b, c, d, 0, 15);
+    II(d, e, a, b, c, x[0], 14);
+    II(c, d, e, a, b, 0x80, 15);
+    II(b, c, d, e, a, 0, 9);
+    II(a, b, c, d, e, x[4], 8);
+    II(e, a, b, c, d, 0, 9);
+    II(d, e, a, b, c, x[3], 14);
+    II(c, d, e, a, b, x[7], 5);
+    II(b, c, d, e, a, 0, 6);
+    II(a, b, c, d, e, 256, 8);
+    II(e, a, b, c, d, x[5], 6);
+    II(d, e, a, b, c, x[6], 5);
+    II(c, d, e, a, b, x[2], 12);
 
     /* round 5 */
-    JJ(b1, c1, d1, e1, a1, x[4], 9);
-    JJ(a1, b1, c1, d1, e1, x[0], 15);
-    JJ(e1, a1, b1, c1, d1, x[5], 5);
-    JJ(d1, e1, a1, b1, c1, 0, 11);
-    JJ(c1, d1, e1, a1, b1, x[7], 6);
-    JJ(b1, c1, d1, e1, a1, 0, 8);
-    JJ(a1, b1, c1, d1, e1, x[2], 13);
-    JJ(e1, a1, b1, c1, d1, 0, 12);
-    JJ(d1, e1, a1, b1, c1, x14, 5);
-    JJ(c1, d1, e1, a1, b1, x[1], 12);
-    JJ(b1, c1, d1, e1, a1, x[3], 13);
-    JJ(a1, b1, c1, d1, e1, x8, 14);
-    JJ(e1, a1, b1, c1, d1, 0, 11);
-    JJ(d1, e1, a1, b1, c1, x[6], 8);
-    JJ(c1, d1, e1, a1, b1, 0, 5);
-    JJ(b1, c1, d1, e1, a1, 0, 6);
-
-    unsigned int a2 = _RIPEMD160_IV[0];
-    unsigned int b2 = _RIPEMD160_IV[1];
-    unsigned int c2 = _RIPEMD160_IV[2];
-    unsigned int d2 = _RIPEMD160_IV[3];
-    unsigned int e2 = _RIPEMD160_IV[4];
+    JJ(b, c, d, e, a, x[4], 9);
+    JJ(a, b, c, d, e, x[0], 15);
+    JJ(e, a, b, c, d, x[5], 5);
+    JJ(d, e, a, b, c, 0, 11);
+    JJ(c, d, e, a, b, x[7], 6);
+    JJ(b, c, d, e, a, 0, 8);
+    JJ(a, b, c, d, e, x[2], 13);
+    JJ(e, a, b, c, d, 0, 12);
+    JJ(d, e, a, b, c, 256, 5);
+    JJ(c, d, e, a, b, x[1], 12);
+    JJ(b, c, d, e, a, x[3], 13);
+    JJ(a, b, c, d, e, 0x80, 14);
+    JJ(e, a, b, c, d, 0, 11);
+    JJ(d, e, a, b, c, x[6], 8);
+    JJ(c, d, e, a, b, 0, 5);
+    JJ(b, c, d, e, a, 0, 6);
+
+    digest[0] = c;
+    digest[1] = d;
+    digest[2] = e;
+    digest[3] = a;
+    digest[4] = b;
+}
+
+void ripemd160p2(const unsigned int x[8], unsigned int digest[5])
+{
+    __private unsigned int a = RIPEMD160_IV[0];
+    __private unsigned int b = RIPEMD160_IV[1];
+    __private unsigned int c = RIPEMD160_IV[2];
+    __private unsigned int d = RIPEMD160_IV[3];
+    __private unsigned int e = RIPEMD160_IV[4];
 
     /* parallel round 1 */
-    JJJ(a2, b2, c2, d2, e2, x[5], 8);
-    JJJ(e2, a2, b2, c2, d2, x14, 9);
-    JJJ(d2, e2, a2, b2, c2, x[7], 9);
-    JJJ(c2, d2, e2, a2, b2, x[0], 11);
-    JJJ(b2, c2, d2, e2, a2, 0, 13);
-    JJJ(a2, b2, c2, d2, e2, x[2], 15);
-    JJJ(e2, a2, b2, c2, d2, 0, 15);
-    JJJ(d2, e2, a2, b2, c2, x[4], 5);
-    JJJ(c2, d2, e2, a2, b2, 0, 7);
-    JJJ(b2, c2, d2, e2, a2, x[6], 7);
-    JJJ(a2, b2, c2, d2, e2, 0, 8);
-    JJJ(e2, a2, b2, c2, d2, x8, 11);
-    JJJ(d2, e2, a2, b2, c2, x[1], 14);
-    JJJ(c2, d2, e2, a2, b2, 0, 14);
-    JJJ(b2, c2, d2, e2, a2, x[3], 12);
-    JJJ(a2, b2, c2, d2, e2, 0, 6);
+    JJJ(a, b, c, d, e, x[5], 8);
+    JJJ(e, a, b, c, d, 256, 9);
+    JJJ(d, e, a, b, c, x[7], 9);
+    JJJ(c, d, e, a, b, x[0], 11);
+    JJJ(b, c, d, e, a, 0, 13);
+    JJJ(a, b, c, d, e, x[2], 15);
+    JJJ(e, a, b, c, d, 0, 15);
+    JJJ(d, e, a, b, c, x[4], 5);
+    JJJ(c, d, e, a, b, 0, 7);
+    JJJ(b, c, d, e, a, x[6], 7);
+    JJJ(a, b, c, d, e, 0, 8);
+    JJJ(e, a, b, c, d, 0x80, 11);
+    JJJ(d, e, a, b, c, x[1], 14);
+    JJJ(c, d, e, a, b, 0, 14);
+    JJJ(b, c, d, e, a, x[3], 12);
+    JJJ(a, b, c, d, e, 0, 6);
 
     /* parallel round 2 */
-    III(e2, a2, b2, c2, d2, x[6], 9);
-    III(d2, e2, a2, b2, c2, 0, 13);
-    III(c2, d2, e2, a2, b2, x[3], 15);
-    III(b2, c2, d2, e2, a2, x[7], 7);
-    III(a2, b2, c2, d2, e2, x[0], 12);
-    III(e2, a2, b2, c2, d2, 0, 8);
-    III(d2, e2, a2, b2, c2, x[5], 9);
-    III(c2, d2, e2, a2, b2, 0, 11);
-    III(b2, c2, d2, e2, a2, x14, 7);
-    III(a2, b2, c2, d2, e2, 0, 7);
-    III(e2, a2, b2, c2, d2, x8, 12);
-    III(d2, e2, a2, b2, c2, 0, 7);
-    III(c2, d2, e2, a2, b2, x[4], 6);
-    III(b2, c2, d2, e2, a2, 0, 15);
-    III(a2, b2, c2, d2, e2, x[1], 13);
-    III(e2, a2, b2, c2, d2, x[2], 11);
+    III(e, a, b, c, d, x[6], 9);
+    III(d, e, a, b, c, 0, 13);
+    III(c, d, e, a, b, x[3], 15);
+    III(b, c, d, e, a, x[7], 7);
+    III(a, b, c, d, e, x[0], 12);
+    III(e, a, b, c, d, 0, 8);
+    III(d, e, a, b, c, x[5], 9);
+    III(c, d, e, a, b, 0, 11);
+    III(b, c, d, e, a, 256, 7);
+    III(a, b, c, d, e, 0, 7);
+    III(e, a, b, c, d, 0x80, 12);
+    III(d, e, a, b, c, 0, 7);
+    III(c, d, e, a, b, x[4], 6);
+    III(b, c, d, e, a, 0, 15);
+    III(a, b, c, d, e, x[1], 13);
+    III(e, a, b, c, d, x[2], 11);
 
     /* parallel round 3 */
-    HHH(d2, e2, a2, b2, c2, 0, 9);
-    HHH(c2, d2, e2, a2, b2, x[5], 7);
-    HHH(b2, c2, d2, e2, a2, x[1], 15);
-    HHH(a2, b2, c2, d2, e2, x[3], 11);
-    HHH(e2, a2, b2, c2, d2, x[7], 8);
-    HHH(d2, e2, a2, b2, c2, x14, 6);
-    HHH(c2, d2, e2, a2, b2, x[6], 6);
-    HHH(b2, c2, d2, e2, a2, 0, 14);
-    HHH(a2, b2, c2, d2, e2, 0, 12);
-    HHH(e2, a2, b2, c2, d2, x8, 13);
-    HHH(d2, e2, a2, b2, c2, 0, 5);
-    HHH(c2, d2, e2, a2, b2, x[2], 14);
-    HHH(b2, c2, d2, e2, a2, 0, 13);
-    HHH(a2, b2, c2, d2, e2, x[0], 13);
-    HHH(e2, a2, b2, c2, d2, x[4], 7);
-    HHH(d2, e2, a2, b2, c2, 0, 5);
+    HHH(d, e, a, b, c, 0, 9);
+    HHH(c, d, e, a, b, x[5], 7);
+    HHH(b, c, d, e, a, x[1], 15);
+    HHH(a, b, c, d, e, x[3], 11);
+    HHH(e, a, b, c, d, x[7], 8);
+    HHH(d, e, a, b, c, 256, 6);
+    HHH(c, d, e, a, b, x[6], 6);
+    HHH(b, c, d, e, a, 0, 14);
+    HHH(a, b, c, d, e, 0, 12);
+    HHH(e, a, b, c, d, 0x80, 13);
+    HHH(d, e, a, b, c, 0, 5);
+    HHH(c, d, e, a, b, x[2], 14);
+    HHH(b, c, d, e, a, 0, 13);
+    HHH(a, b, c, d, e, x[0], 13);
+    HHH(e, a, b, c, d, x[4], 7);
+    HHH(d, e, a, b, c, 0, 5);
 
     /* parallel round 4 */
-    GGG(c2, d2, e2, a2, b2, x8, 15);
-    GGG(b2, c2, d2, e2, a2, x[6], 5);
-    GGG(a2, b2, c2, d2, e2, x[4], 8);
-    GGG(e2, a2, b2, c2, d2, x[1], 11);
-    GGG(d2, e2, a2, b2, c2, x[3], 14);
-    GGG(c2, d2, e2, a2, b2, 0, 14);
-    GGG(b2, c2, d2, e2, a2, 0, 6);
-    GGG(a2, b2, c2, d2, e2, x[0], 14);
-    GGG(e2, a2, b2, c2, d2, x[5], 6);
-    GGG(d2, e2, a2, b2, c2, 0, 9);
-    GGG(c2, d2, e2, a2, b2, x[2], 12);
-    GGG(b2, c2, d2, e2, a2, 0, 9);
-    GGG(a2, b2, c2, d2, e2, 0, 12);
-    GGG(e2, a2, b2, c2, d2, x[7], 5);
-    GGG(d2, e2, a2, b2, c2, 0, 15);
-    GGG(c2, d2, e2, a2, b2, x14, 8);
+    GGG(c, d, e, a, b, 0x80, 15);
+    GGG(b, c, d, e, a, x[6], 5);
+    GGG(a, b, c, d, e, x[4], 8);
+    GGG(e, a, b, c, d, x[1], 11);
+    GGG(d, e, a, b, c, x[3], 14);
+    GGG(c, d, e, a, b, 0, 14);
+    GGG(b, c, d, e, a, 0, 6);
+    GGG(a, b, c, d, e, x[0], 14);
+    GGG(e, a, b, c, d, x[5], 6);
+    GGG(d, e, a, b, c, 0, 9);
+    GGG(c, d, e, a, b, x[2], 12);
+    GGG(b, c, d, e, a, 0, 9);
+    GGG(a, b, c, d, e, 0, 12);
+    GGG(e, a, b, c, d, x[7], 5);
+    GGG(d, e, a, b, c, 0, 15);
+    GGG(c, d, e, a, b, 256, 8);
 
     /* parallel round 5 */
-    FFF(b2, c2, d2, e2, a2, 0, 8);
-    FFF(a2, b2, c2, d2, e2, 0, 5);
-    FFF(e2, a2, b2, c2, d2, 0, 12);
-    FFF(d2, e2, a2, b2, c2, x[4], 9);
-    FFF(c2, d2, e2, a2, b2, x[1], 12);
-    FFF(b2, c2, d2, e2, a2, x[5], 5);
-    FFF(a2, b2, c2, d2, e2, x8, 14);
-    FFF(e2, a2, b2, c2, d2, x[7], 6);
-    FFF(d2, e2, a2, b2, c2, x[6], 8);
-    FFF(c2, d2, e2, a2, b2, x[2], 13);
-    FFF(b2, c2, d2, e2, a2, 0, 6);
-    FFF(a2, b2, c2, d2, e2, x14, 5);
-    FFF(e2, a2, b2, c2, d2, x[0], 15);
-    FFF(d2, e2, a2, b2, c2, x[3], 13);
-    FFF(c2, d2, e2, a2, b2, 0, 11);
-    FFF(b2, c2, d2, e2, a2, 0, 11);
-
-    digest[0] = _RIPEMD160_IV[1] + c1 + d2;
-    digest[1] = _RIPEMD160_IV[2] + d1 + e2;
-    digest[2] = _RIPEMD160_IV[3] + e1 + a2;
-    digest[3] = _RIPEMD160_IV[4] + a1 + b2;
-    digest[4] = _RIPEMD160_IV[0] + b1 + c2;
+    FFF(b, c, d, e, a, 0, 8);
+    FFF(a, b, c, d, e, 0, 5);
+    FFF(e, a, b, c, d, 0, 12);
+    FFF(d, e, a, b, c, x[4], 9);
+    FFF(c, d, e, a, b, x[1], 12);
+    FFF(b, c, d, e, a, x[5], 5);
+    FFF(a, b, c, d, e, 0x80, 14);
+    FFF(e, a, b, c, d, x[7], 6);
+    FFF(d, e, a, b, c, x[6], 8);
+    FFF(c, d, e, a, b, x[2], 13);
+    FFF(b, c, d, e, a, 0, 6);
+    FFF(a, b, c, d, e, 256, 5);
+    FFF(e, a, b, c, d, x[0], 15);
+    FFF(d, e, a, b, c, x[3], 13);
+    FFF(c, d, e, a, b, 0, 11);
+    FFF(b, c, d, e, a, 0, 11);
+
+    digest[0] = d;
+    digest[1] = e;
+    digest[2] = a;
+    digest[3] = b;
+    digest[4] = c;
 }
 
-
 void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5])
 {
-    unsigned int a1 = _RIPEMD160_IV[0];
-    unsigned int b1 = _RIPEMD160_IV[1];
-    unsigned int c1 = _RIPEMD160_IV[2];
-    unsigned int d1 = _RIPEMD160_IV[3];
-    unsigned int e1 = _RIPEMD160_IV[4];
-
-    const unsigned int x8 = 0x00000080;
-    const unsigned int x14 = 256;
-
-    /* round 1 */
-    FF(a1, b1, c1, d1, e1, x[0], 11);
-    FF(e1, a1, b1, c1, d1, x[1], 14);
-    FF(d1, e1, a1, b1, c1, x[2], 15);
-    FF(c1, d1, e1, a1, b1, x[3], 12);
-    FF(b1, c1, d1, e1, a1, x[4], 5);
-    FF(a1, b1, c1, d1, e1, x[5], 8);
-    FF(e1, a1, b1, c1, d1, x[6], 7);
-    FF(d1, e1, a1, b1, c1, x[7], 9);
-    FF(c1, d1, e1, a1, b1, x8, 11);
-    FF(b1, c1, d1, e1, a1, 0, 13);
-    FF(a1, b1, c1, d1, e1, 0, 14);
-    FF(e1, a1, b1, c1, d1, 0, 15);
-    FF(d1, e1, a1, b1, c1, 0, 6);
-    FF(c1, d1, e1, a1, b1, 0, 7);
-    FF(b1, c1, d1, e1, a1, x14, 9);
-    FF(a1, b1, c1, d1, e1, 0, 8);
-
-    /* round 2 */
-    GG(e1, a1, b1, c1, d1, x[7], 7);
-    GG(d1, e1, a1, b1, c1, x[4], 6);
-    GG(c1, d1, e1, a1, b1, 0, 8);
-    GG(b1, c1, d1, e1, a1, x[1], 13);
-    GG(a1, b1, c1, d1, e1, 0, 11);
-    GG(e1, a1, b1, c1, d1, x[6], 9);
-    GG(d1, e1, a1, b1, c1, 0, 7);
-    GG(c1, d1, e1, a1, b1, x[3], 15);
-    GG(b1, c1, d1, e1, a1, 0, 7);
-    GG(a1, b1, c1, d1, e1, x[0], 12);
-    GG(e1, a1, b1, c1, d1, 0, 15);
-    GG(d1, e1, a1, b1, c1, x[5], 9);
-    GG(c1, d1, e1, a1, b1, x[2], 11);
-    GG(b1, c1, d1, e1, a1, x14, 7);
-    GG(a1, b1, c1, d1, e1, 0, 13);
-    GG(e1, a1, b1, c1, d1, x8, 12);
-
-    /* round 3 */
-    HH(d1, e1, a1, b1, c1, x[3], 11);
-    HH(c1, d1, e1, a1, b1, 0, 13);
-    HH(b1, c1, d1, e1, a1, x14, 6);
-    HH(a1, b1, c1, d1, e1, x[4], 7);
-    HH(e1, a1, b1, c1, d1, 0, 14);
-    HH(d1, e1, a1, b1, c1, 0, 9);
-    HH(c1, d1, e1, a1, b1, x8, 13);
-    HH(b1, c1, d1, e1, a1, x[1], 15);
-    HH(a1, b1, c1, d1, e1, x[2], 14);
-    HH(e1, a1, b1, c1, d1, x[7], 8);
-    HH(d1, e1, a1, b1, c1, x[0], 13);
-    HH(c1, d1, e1, a1, b1, x[6], 6);
-    HH(b1, c1, d1, e1, a1, 0, 5);
-    HH(a1, b1, c1, d1, e1, 0, 12);
-    HH(e1, a1, b1, c1, d1, x[5], 7);
-    HH(d1, e1, a1, b1, c1, 0, 5);
+    __private unsigned int digest1[5];
+    __private unsigned int digest2[5];
 
-    /* round 4 */
-    II(c1, d1, e1, a1, b1, x[1], 11);
-    II(b1, c1, d1, e1, a1, 0, 12);
-    II(a1, b1, c1, d1, e1, 0, 14);
-    II(e1, a1, b1, c1, d1, 0, 15);
-    II(d1, e1, a1, b1, c1, x[0], 14);
-    II(c1, d1, e1, a1, b1, x8, 15);
-    II(b1, c1, d1, e1, a1, 0, 9);
-    II(a1, b1, c1, d1, e1, x[4], 8);
-    II(e1, a1, b1, c1, d1, 0, 9);
-    II(d1, e1, a1, b1, c1, x[3], 14);
-    II(c1, d1, e1, a1, b1, x[7], 5);
-    II(b1, c1, d1, e1, a1, 0, 6);
-    II(a1, b1, c1, d1, e1, x14, 8);
-    II(e1, a1, b1, c1, d1, x[5], 6);
-    II(d1, e1, a1, b1, c1, x[6], 5);
-    II(c1, d1, e1, a1, b1, x[2], 12);
-
-    /* round 5 */
-    JJ(b1, c1, d1, e1, a1, x[4], 9);
-    JJ(a1, b1, c1, d1, e1, x[0], 15);
-    JJ(e1, a1, b1, c1, d1, x[5], 5);
-    JJ(d1, e1, a1, b1, c1, 0, 11);
-    JJ(c1, d1, e1, a1, b1, x[7], 6);
-    JJ(b1, c1, d1, e1, a1, 0, 8);
-    JJ(a1, b1, c1, d1, e1, x[2], 13);
-    JJ(e1, a1, b1, c1, d1, 0, 12);
-    JJ(d1, e1, a1, b1, c1, x14, 5);
-    JJ(c1, d1, e1, a1, b1, x[1], 12);
-    JJ(b1, c1, d1, e1, a1, x[3], 13);
-    JJ(a1, b1, c1, d1, e1, x8, 14);
-    JJ(e1, a1, b1, c1, d1, 0, 11);
-    JJ(d1, e1, a1, b1, c1, x[6], 8);
-    JJ(c1, d1, e1, a1, b1, 0, 5);
-    JJ(b1, c1, d1, e1, a1, 0, 6);
-
-    unsigned int a2 = _RIPEMD160_IV[0];
-    unsigned int b2 = _RIPEMD160_IV[1];
-    unsigned int c2 = _RIPEMD160_IV[2];
-    unsigned int d2 = _RIPEMD160_IV[3];
-    unsigned int e2 = _RIPEMD160_IV[4];
-
-    /* parallel round 1 */
-    JJJ(a2, b2, c2, d2, e2, x[5], 8);
-    JJJ(e2, a2, b2, c2, d2, x14, 9);
-    JJJ(d2, e2, a2, b2, c2, x[7], 9);
-    JJJ(c2, d2, e2, a2, b2, x[0], 11);
-    JJJ(b2, c2, d2, e2, a2, 0, 13);
-    JJJ(a2, b2, c2, d2, e2, x[2], 15);
-    JJJ(e2, a2, b2, c2, d2, 0, 15);
-    JJJ(d2, e2, a2, b2, c2, x[4], 5);
-    JJJ(c2, d2, e2, a2, b2, 0, 7);
-    JJJ(b2, c2, d2, e2, a2, x[6], 7);
-    JJJ(a2, b2, c2, d2, e2, 0, 8);
-    JJJ(e2, a2, b2, c2, d2, x8, 11);
-    JJJ(d2, e2, a2, b2, c2, x[1], 14);
-    JJJ(c2, d2, e2, a2, b2, 0, 14);
-    JJJ(b2, c2, d2, e2, a2, x[3], 12);
-    JJJ(a2, b2, c2, d2, e2, 0, 6);
-
-    /* parallel round 2 */
-    III(e2, a2, b2, c2, d2, x[6], 9);
-    III(d2, e2, a2, b2, c2, 0, 13);
-    III(c2, d2, e2, a2, b2, x[3], 15);
-    III(b2, c2, d2, e2, a2, x[7], 7);
-    III(a2, b2, c2, d2, e2, x[0], 12);
-    III(e2, a2, b2, c2, d2, 0, 8);
-    III(d2, e2, a2, b2, c2, x[5], 9);
-    III(c2, d2, e2, a2, b2, 0, 11);
-    III(b2, c2, d2, e2, a2, x14, 7);
-    III(a2, b2, c2, d2, e2, 0, 7);
-    III(e2, a2, b2, c2, d2, x8, 12);
-    III(d2, e2, a2, b2, c2, 0, 7);
-    III(c2, d2, e2, a2, b2, x[4], 6);
-    III(b2, c2, d2, e2, a2, 0, 15);
-    III(a2, b2, c2, d2, e2, x[1], 13);
-    III(e2, a2, b2, c2, d2, x[2], 11);
-
-    /* parallel round 3 */
-    HHH(d2, e2, a2, b2, c2, 0, 9);
-    HHH(c2, d2, e2, a2, b2, x[5], 7);
-    HHH(b2, c2, d2, e2, a2, x[1], 15);
-    HHH(a2, b2, c2, d2, e2, x[3], 11);
-    HHH(e2, a2, b2, c2, d2, x[7], 8);
-    HHH(d2, e2, a2, b2, c2, x14, 6);
-    HHH(c2, d2, e2, a2, b2, x[6], 6);
-    HHH(b2, c2, d2, e2, a2, 0, 14);
-    HHH(a2, b2, c2, d2, e2, 0, 12);
-    HHH(e2, a2, b2, c2, d2, x8, 13);
-    HHH(d2, e2, a2, b2, c2, 0, 5);
-    HHH(c2, d2, e2, a2, b2, x[2], 14);
-    HHH(b2, c2, d2, e2, a2, 0, 13);
-    HHH(a2, b2, c2, d2, e2, x[0], 13);
-    HHH(e2, a2, b2, c2, d2, x[4], 7);
-    HHH(d2, e2, a2, b2, c2, 0, 5);
-
-    /* parallel round 4 */
-    GGG(c2, d2, e2, a2, b2, x8, 15);
-    GGG(b2, c2, d2, e2, a2, x[6], 5);
-    GGG(a2, b2, c2, d2, e2, x[4], 8);
-    GGG(e2, a2, b2, c2, d2, x[1], 11);
-    GGG(d2, e2, a2, b2, c2, x[3], 14);
-    GGG(c2, d2, e2, a2, b2, 0, 14);
-    GGG(b2, c2, d2, e2, a2, 0, 6);
-    GGG(a2, b2, c2, d2, e2, x[0], 14);
-    GGG(e2, a2, b2, c2, d2, x[5], 6);
-    GGG(d2, e2, a2, b2, c2, 0, 9);
-    GGG(c2, d2, e2, a2, b2, x[2], 12);
-    GGG(b2, c2, d2, e2, a2, 0, 9);
-    GGG(a2, b2, c2, d2, e2, 0, 12);
-    GGG(e2, a2, b2, c2, d2, x[7], 5);
-    GGG(d2, e2, a2, b2, c2, 0, 15);
-    GGG(c2, d2, e2, a2, b2, x14, 8);
+    ripemd160p1(x, digest1);
+    ripemd160p2(x, digest2);
 
-    /* parallel round 5 */
-    FFF(b2, c2, d2, e2, a2, 0, 8);
-    FFF(a2, b2, c2, d2, e2, 0, 5);
-    FFF(e2, a2, b2, c2, d2, 0, 12);
-    FFF(d2, e2, a2, b2, c2, x[4], 9);
-    FFF(c2, d2, e2, a2, b2, x[1], 12);
-    FFF(b2, c2, d2, e2, a2, x[5], 5);
-    FFF(a2, b2, c2, d2, e2, x8, 14);
-    FFF(e2, a2, b2, c2, d2, x[7], 6);
-    FFF(d2, e2, a2, b2, c2, x[6], 8);
-    FFF(c2, d2, e2, a2, b2, x[2], 13);
-    FFF(b2, c2, d2, e2, a2, 0, 6);
-    FFF(a2, b2, c2, d2, e2, x14, 5);
-    FFF(e2, a2, b2, c2, d2, x[0], 15);
-    FFF(d2, e2, a2, b2, c2, x[3], 13);
-    FFF(c2, d2, e2, a2, b2, 0, 11);
-    FFF(b2, c2, d2, e2, a2, 0, 11);
-
-    digest[0] = c1 + d2;
-    digest[1] = d1 + e2;
-    digest[2] = e1 + a2;
-    digest[3] = a1 + b2;
-    digest[4] = b1 + c2;
+    digest[0] = digest1[0] + digest2[0];
+    digest[1] = digest1[1] + digest2[1];
+    digest[2] = digest1[2] + digest2[2];
+    digest[3] = digest1[3] + digest2[3];
+    digest[4] = digest1[4] + digest2[4];
 }
-#endif
-#ifndef _SECP256K1_CL
-#define _SECP256K1_CL
-
-typedef ulong uint64_t;
-
-typedef struct {
-    uint v[8];
-}uint256_t;
 
+void ripemd160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
+{
+    hOut[0] = endian(hIn[0] + RIPEMD160_IV[1]);
+    hOut[1] = endian(hIn[1] + RIPEMD160_IV[2]);
+    hOut[2] = endian(hIn[2] + RIPEMD160_IV[3]);
+    hOut[3] = endian(hIn[3] + RIPEMD160_IV[4]);
+    hOut[4] = endian(hIn[4] + RIPEMD160_IV[0]);
+}
 
-/**
- Prime modulus 2^256 - 2^32 - 977
- */
-__constant unsigned int _P[8] = {
-    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
-};
+#endif
+#ifndef SECP256K1_CL
+#define SECP256K1_CL
 
-__constant unsigned int _P_MINUS1[8] = {
-    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
-};
+typedef struct uint256_t {
+    unsigned int v[8];
+} uint256_t;
 
 /**
- Base point X
+ * Base point X
  */
-__constant unsigned int _GX[8] = {
+__constant unsigned int GX[8] = {
     0x79BE667E, 0xF9DCBBAC, 0x55A06295, 0xCE870B07, 0x029BFCDB, 0x2DCE28D9, 0x59F2815B, 0x16F81798
 };
 
 /**
- Base point Y
+ * Base point Y
  */
-__constant unsigned int _GY[8] = {
+__constant unsigned int GY[8] = {
     0x483ADA77, 0x26A3C465, 0x5DA4FBFC, 0x0E1108A8, 0xFD17B448, 0xA6855419, 0x9C47D08F, 0xFB10D4B8
 };
 
-
 /**
  * Group order
  */
-__constant unsigned int _N[8] = {
+__constant unsigned int N[8] = {
     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xBAAEDCE6, 0xAF48A03B, 0xBFD25E8C, 0xD0364141
 };
 
-__constant unsigned int _INFINITY[8] = {
-    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
+/**
+ * Prime modulus 2^256 - 2^32 - 977
+ */
+__constant unsigned int P[8] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
 };
 
-void printBigInt(const unsigned int x[8])
-{
-    printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
-        x[0], x[1], x[2], x[3],
-        x[4], x[5], x[6], x[7]);
-}
-
-// Add with carry
-unsigned int addc(unsigned int a, unsigned int b, unsigned int *carry)
-{
-    unsigned int sum = a + *carry;
-
-    unsigned int c1 = (sum < a) ? 1 : 0;
-
-    sum = sum + b;
-    
-    unsigned int c2 = (sum < b) ? 1 : 0;
-
-    *carry = c1 | c2;
-
-    return sum;
-}
-
-// Subtract with borrow
-unsigned int subc(unsigned int a, unsigned int b, unsigned int *borrow)
-{
-    unsigned int diff = a - *borrow;
-
-    *borrow = (diff > a) ? 1 : 0;
-
-    unsigned int diff2 = diff - b;
-
-    *borrow |= (diff2 > diff) ? 1 : 0;
-
-    return diff2;
-}
-
 #ifdef DEVICE_VENDOR_INTEL
-
 // Intel devices have a mul_hi bug
-unsigned int mul_hi977(unsigned int x)
+inline unsigned int mul_hi977(unsigned int x)
 {
     unsigned int high = x >> 16;
     unsigned int low = x & 0xffff;
@@ -588,663 +356,673 @@ unsigned int mul_hi977(unsigned int x)
 }
 
 // 32 x 32 multiply-add
-void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c)
+inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c)
 {
-    *low = a * 977;
-    unsigned int tmp = *low + c;
+    *low = *a * 977;
+    unsigned int tmp = *low + *c;
     unsigned int carry = tmp < *low ? 1 : 0;
     *low = tmp;
-    *high = mul_hi977(a) + carry;
+    *high = mul_hi977(*a) + carry;
 }
-
 #else
 
-// 32 x 32 multiply-add
-void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c)
+inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c)
 {
-    *low = a * 977;
-    unsigned int tmp = *low + c;
+    *low = *a * 977;
+    unsigned int tmp = *low + *c;
     unsigned int carry = tmp < *low ? 1 : 0;
     *low = tmp;
-    *high = mad_hi(a, (unsigned int)977, carry);
+    *high = mad_hi(*a, (unsigned int)977, carry);
 }
 
 #endif
 
-// 32 x 32 multiply-add
-void madd(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b, unsigned int c)
-{
-    *low = a * b;
-    unsigned int tmp = *low + c;
-    unsigned int carry = tmp < *low ? 1 : 0;
-    *low = tmp;
-    *high = mad_hi(a, b, carry);
-}
-
-void mull(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b)
-{
-    *low = a * b;
-    *high = mul_hi(a, b);
-}
-
-
-uint256_t sub256k(uint256_t a, uint256_t b, unsigned int* borrow_ptr)
-{
-    unsigned int borrow = 0;
-    uint256_t c;
-
-    for(int i = 7; i >= 0; i--) {
-        c.v[i] = subc(a.v[i], b.v[i], &borrow);
-    }
-
-    *borrow_ptr = borrow;
-
-    return c;
-}
-
-bool greaterThanEqualToP(const unsigned int a[8])
-{
-    for(int i = 0; i < 8; i++) {
-        if(a[i] > _P_MINUS1[i]) {
-            return true;
-        } else if(a[i] < _P_MINUS1[i]) {
-            return false;
-        }
-    }
-
-    return true;
-}
+// Add with carry
+#define addc(a, b, sum, carry, tmp)      \
+    sum = (a) + (carry);                 \
+    tmp = ((sum) < (a)) * 1;             \
+    sum = (sum) + (b);                   \
+    carry = (tmp) | (((sum) < (b)) * 1);
+
+// subtract with borrow
+#define subc(a, b, diff, borrow, tmp)    \
+    tmp = (a) - (borrow);                \
+    borrow = ((tmp) > (a)) * 1;          \
+    diff = (tmp) - (b);                  \
+    borrow |= ((diff) > (tmp)) ? 1 : 0;
+
+#define add256k(a, b, c, carry, tmp)    \
+    addc(a[7], b[7], c[7], carry, tmp); \
+    addc(a[6], b[6], c[6], carry, tmp); \
+    addc(a[5], b[5], c[5], carry, tmp); \
+    addc(a[4], b[4], c[4], carry, tmp); \
+    addc(a[3], b[3], c[3], carry, tmp); \
+    addc(a[2], b[2], c[2], carry, tmp); \
+    addc(a[1], b[1], c[1], carry, tmp); \
+    addc(a[0], b[0], c[0], carry, tmp);
+
+#define sub256k( a, b, c, borrow, tmp)   \
+    subc(a[7], b[7], c[7], borrow, tmp); \
+    subc(a[6], b[6], c[6], borrow, tmp); \
+    subc(a[5], b[5], c[5], borrow, tmp); \
+    subc(a[4], b[4], c[4], borrow, tmp); \
+    subc(a[3], b[3], c[3], borrow, tmp); \
+    subc(a[2], b[2], c[2], borrow, tmp); \
+    subc(a[1], b[1], c[1], borrow, tmp); \
+    subc(a[0], b[0], c[0], borrow, tmp);
+
+#define isInfinity256k(a)        \
+    (                           \
+        (a[0] == 0xffffffff) && \
+        (a[1] == 0xffffffff) && \
+        (a[2] == 0xffffffff) && \
+        (a[3] == 0xffffffff) && \
+        (a[4] == 0xffffffff) && \
+        (a[5] == 0xffffffff) && \
+        (a[6] == 0xffffffff) && \
+        (a[7] == 0xffffffff)    \
+    )
+
+#define greaterOrEqualToP(a)    \
+    (a[6] >= P[6] || a[7] >= P[7])
+
+#define equal256k(a, b)   \
+    (                     \
+        (a[0] == b[0]) && \
+        (a[1] == b[1]) && \
+        (a[2] == b[2]) && \
+        (a[3] == b[3]) && \
+        (a[4] == b[4]) && \
+        (a[5] == b[5]) && \
+        (a[6] == b[6]) && \
+        (a[7] == b[7])    \
+    )
 
 void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int out_high[8], unsigned int out_low[8])
 {
-    unsigned int z[16];
-
-    unsigned int high = 0;
+    __private unsigned long product;
 
     // First round, overwrite z
-    for(int j = 7; j >= 0; j--) {
-
-        uint64_t product = (uint64_t)x[7] * y[j];
-
-        product = product + high;
-
-        z[7 + j + 1] = (unsigned int)product;
-        high = (unsigned int)(product >> 32);
-    }
-    z[7] = high;
-
-    for(int i = 6; i >= 0; i--) {
-
-        high = 0;
-
-        for(int j = 7; j >= 0; j--) {
-
-            uint64_t product = (uint64_t)x[i] * y[j];
+    product = (unsigned long)x[7] * y[7];
+    out_low[7] = (unsigned int)product;
+    
+    product = (unsigned long)x[7] * y[6] + (unsigned int)(product >> 32);
+    out_low[6] = (unsigned int)product;
+    
+    product = (unsigned long)x[7] * y[5] + (unsigned int)(product >> 32);
+    out_low[5] = (unsigned int)product;
+    
+    product = (unsigned long)x[7] * y[4] + (unsigned int)(product >> 32);
+    out_low[4] = (unsigned int)product;
+    
+    product = (unsigned long)x[7] * y[3] + (unsigned int)(product >> 32);
+    out_low[3] = (unsigned int)product;
+    
+    product = (unsigned long)x[7] * y[2] + (unsigned int)(product >> 32);
+    out_low[2] = (unsigned int)product;
+        
+    product = (unsigned long)x[7] * y[1] + (unsigned int)(product >> 32);
+    out_low[1] = (unsigned int)product;
+        
+    product = (unsigned long)x[7] * y[0] + (unsigned int)(product >> 32);
+    out_low[0] = (unsigned int)product;
+    out_high[7] = (unsigned int)(product >> 32);
+
+    product = (unsigned long)x[6] * y[7] + out_low[6];
+    out_low[6] = (unsigned int)product;
+
+    /** round6 */
+    product = (unsigned long)x[6] * y[6] + out_low[5] + (product >> 32);
+    out_low[5] = (unsigned int)product;
+
+    product = (unsigned long)x[6] * y[5] + out_low[4] + (product >> 32);
+    out_low[4] = (unsigned int)product;
+
+    product = (unsigned long)x[6] * y[4] + out_low[3] + (product >> 32);
+    out_low[3] = (unsigned int)product;
+
+    product = (unsigned long)x[6] * y[3] + out_low[2] + (product >> 32);
+    out_low[2] = (unsigned int)product;
+
+    product = (unsigned long)x[6] * y[2] + out_low[1] + (product >> 32);
+    out_low[1] = (unsigned int)product;
+    
+    product = (unsigned long)x[6] * y[1] + out_low[0] + (product >> 32);
+    out_low[0] = (unsigned int)product;
+    
+    product = (unsigned long)x[6] * y[0] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
+    out_high[6] = product >> 32;
 
-            product = product + z[i + j + 1] + high;
+    /** round 5 */
+    product = (unsigned long)x[5] * y[7] + out_low[5];
+    out_low[5] = (unsigned int)product;
 
-            z[i + j + 1] = (unsigned int)product;
+    product = (unsigned long)x[5] * y[6] + out_low[4] + (product >> 32);
+    out_low[4] = (unsigned int)product;
 
-            high = product >> 32;
-        }
+    product = (unsigned long)x[5] * y[5] + out_low[3] + (product >> 32);
+    out_low[3] = (unsigned int)product;
 
-        z[i] = high;
-    }
+    product = (unsigned long)x[5] * y[4] + out_low[2] + (product >> 32);
+    out_low[2] = (unsigned int)product;
 
-    for(int i = 0; i < 8; i++) {
-        out_high[i] = z[i];
-        out_low[i] = z[8 + i];
-    }
-}
+    product = (unsigned long)x[5] * y[3] + out_low[1] + (product >> 32);
+    out_low[1] = (unsigned int)product;
 
+    product = (unsigned long)x[5] * y[2] + out_low[0] + (product >> 32);
+    out_low[0] = (unsigned int)product;
+    
+    product = (unsigned long)x[5] * y[1] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
+    
+    product = (unsigned long)x[5] * y[0] + out_high[6] + (product >> 32);
+    out_high[6] = (unsigned int)product;
+    out_high[5] = product >> 32;
 
-unsigned int add256(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
-    unsigned int carry = 0;
-
-    for(int i = 7; i >= 0; i--) {
-        c[i] = addc(a[i], b[i], &carry);
-    }
-
-    return carry;
-}
-
-uint256_t add256k(uint256_t a, uint256_t b, unsigned int* carry_ptr)
-{
-    uint256_t c;
-    unsigned int carry = 0;
+    /** round 4 */
+    product = (unsigned long)x[4] * y[7] + out_low[4];
+    out_low[4] = (unsigned int)product;
 
-    for(int i = 7; i >= 0; i--) {
-        c.v[i] = addc(a.v[i], b.v[i], &carry);
-    }
+    product = (unsigned long)x[4] * y[6] + out_low[3] + (product >> 32);
+    out_low[3] = (unsigned int)product;
 
-    *carry_ptr = carry;
+    product = (unsigned long)x[4] * y[5] + out_low[2] + (product >> 32);
+    out_low[2] = (unsigned int)product;
 
-    return c;
-}
+    product = (unsigned long)x[4] * y[4] + out_low[1] + (product >> 32);
+    out_low[1] = (unsigned int)product;
 
-bool isInfinity(const unsigned int x[8])
-{
-    bool isf = true;
+    product = (unsigned long)x[4] * y[3] + out_low[0] + (product >> 32);
+    out_low[0] = (unsigned int)product;
 
-    for(int i = 0; i < 8; i++) {
-        if(x[i] != 0xffffffff) {
-            isf = false;
-        }
-    }
-
-    return isf;
-}
-
-bool isInfinity256k(const uint256_t x)
-{
-    bool isf = true;
+    product = (unsigned long)x[4] * y[2] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
+    
+    product = (unsigned long)x[4] * y[1] + out_high[6] + (product >> 32);
+    out_high[6] = (unsigned int)product;
+    
+    product = (unsigned long)x[4] * y[0] + out_high[5] + (product >> 32);
+    out_high[5] = (unsigned int)product;
+    out_high[4] = product >> 32;
 
-    for(int i = 0; i < 8; i++) {
-        if(x.v[i] != 0xffffffff) {
-            isf = false;
-        }
-    }
+    /** round 3 */
+    product = (unsigned long)x[3] * y[7] + out_low[3];
+    out_low[3] = (unsigned int)product;
 
-    return isf;
-}
+    product = (unsigned long)x[3] * y[6] + out_low[2] + (product >> 32);
+    out_low[2] = (unsigned int)product;
 
-bool equal(const unsigned int a[8], const unsigned int b[8])
-{
-    for(int i = 0; i < 8; i++) {
-        if(a[i] != b[i]) {
-            return false;
-        }
-    }
+    product = (unsigned long)x[3] * y[5] + out_low[1] + (product >> 32);
+    out_low[1] = (unsigned int)product;
 
-    return true;
-}
+    product = (unsigned long)x[3] * y[4] + out_low[0] + (product >> 32);
+    out_low[0] = (unsigned int)product;
 
-bool equal256k(uint256_t a, uint256_t b)
-{
-    for(int i = 0; i < 8; i++) {
-        if(a.v[i] != b.v[i]) {
-            return false;
-        }
-    }
+    product = (unsigned long)x[3] * y[3] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
 
-    return true;
-}
+    product = (unsigned long)x[3] * y[2] + out_high[6] + (product >> 32);
+    out_high[6] = (unsigned int)product;
+    
+    product = (unsigned long)x[3] * y[1] + out_high[5] + (product >> 32);
+    out_high[5] = (unsigned int)product;
+    
+    product = (unsigned long)x[3] * y[0] + out_high[4] + (product >> 32);
+    out_high[4] = (unsigned int)product;
+    out_high[3] = product >> 32;
 
-inline uint256_t readInt256(__global const uint256_t* ara, int idx)
-{
-    return ara[idx];
-}
+    /** round 2 */
+    product = (unsigned long)x[2] * y[7] + out_low[2];
+    out_low[2] = (unsigned int)product;
 
-/*
- * Read least-significant word
- */
-unsigned int readLSW(__global const unsigned int *ara, int idx)
-{
-    return ara[idx * 8 + 7];
-}
+    product = (unsigned long)x[2] * y[6] + out_low[1] + (product >> 32);
+    out_low[1] = (unsigned int)product;
 
-unsigned int readLSW256k(__global const uint256_t* ara, int idx)
-{
-    return ara[idx].v[7];
-}
+    product = (unsigned long)x[2] * y[5] + out_low[0] + (product >> 32);
+    out_low[0] = (unsigned int)product;
 
-unsigned int readWord256k(__global const uint256_t* ara, int idx, int word)
-{
-    return ara[idx].v[word];
-}
+    product = (unsigned long)x[2] * y[4] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
 
-unsigned int addP(const unsigned int a[8], unsigned int c[8])
-{
-    unsigned int carry = 0;
+    product = (unsigned long)x[2] * y[3] + out_high[6] + (product >> 32);
+    out_high[6] = (unsigned int)product;
 
-    for(int i = 7; i >= 0; i--) {
-        c[i] = addc(a[i], _P[i], &carry);
-    }
+    product = (unsigned long)x[2] * y[2] + out_high[5] + (product >> 32);
+    out_high[5] = (unsigned int)product;
+    
+    product = (unsigned long)x[2] * y[1] + out_high[4] + (product >> 32);
+    out_high[4] = (unsigned int)product;
+    
+    product = (unsigned long)x[2] * y[0] + out_high[3] + (product >> 32);
+    out_high[3] = (unsigned int)product;
+    out_high[2] = product >> 32;
+    
+    /** round 1 */
+    product = (unsigned long)x[1] * y[7] + out_low[1];
+    out_low[1] = (unsigned int)product;
 
-    return carry;
-}
+    product = (unsigned long)x[1] * y[6] + out_low[0] + (product >> 32);
+    out_low[0] = (unsigned int)product;
 
-unsigned int subP(const unsigned int a[8], unsigned int c[8])
-{
-    unsigned int borrow = 0;
-    for(int i = 7; i >= 0; i--) {
-        c[i] = subc(a[i], _P[i], &borrow);
-    }
+    product = (unsigned long)x[1] * y[5] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
 
-    return borrow;
-}
+    product = (unsigned long)x[1] * y[4] + out_high[6] + (product >> 32);
+    out_high[6] = (unsigned int)product;
 
-/**
- * Subtraction mod p
- */
-uint256_t subModP256k(uint256_t a, uint256_t b)
-{
-    unsigned int borrow = 0;
-    uint256_t c = sub256k(a, b, &borrow);
-    if(borrow) {
-        addP(c.v, c.v);
-    }
+    product = (unsigned long)x[1] * y[3] + out_high[5] + (product >> 32);
+    out_high[5] = (unsigned int)product;
 
-    return c;
-}
+    product = (unsigned long)x[1] * y[2] + out_high[4] + (product >> 32);
+    out_high[4] = (unsigned int)product;
+    
+    product = (unsigned long)x[1] * y[1] + out_high[3] + (product >> 32);
+    out_high[3] = (unsigned int)product;
+    
+    product = (unsigned long)x[1] * y[0] + out_high[2] + (product >> 32);
+    out_high[2] = (unsigned int)product;
+    out_high[1] = product >> 32;
 
+    /** round 0 */
+    product = (unsigned long)x[0] * y[7] + out_low[0];
+    out_low[0] = (unsigned int)product;
 
-uint256_t addModP256k(uint256_t a, uint256_t b)
-{
-    unsigned int carry = 0;
+    product = (unsigned long)x[0] * y[6] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
 
-    uint256_t c = add256k(a, b, &carry);
+    product = (unsigned long)x[0] * y[5] + out_high[6] + (product >> 32);
+    out_high[6] = (unsigned int)product;
 
-    bool gt = false;
-    for(int i = 0; i < 8; i++) {
-        if(c.v[i] > _P[i]) {
-            gt = true;
-            break;
-        } else if(c.v[i] < _P[i]) {
-            break;
-        }
-    }
+    product = (unsigned long)x[0] * y[4] + out_high[5] + (product >> 32);
+    out_high[5] = (unsigned int)product;
 
-    if(carry || gt) {
-        subP(c.v, c.v);
-    }
+    product = (unsigned long)x[0] * y[3] + out_high[4] + (product >> 32);
+    out_high[4] = (unsigned int)product;
 
-    return c;
+    product = (unsigned long)x[0] * y[2] + out_high[3] + (product >> 32);
+    out_high[3] = (unsigned int)product;
+    
+    product = (unsigned long)x[0] * y[1] + out_high[2] + (product >> 32);
+    out_high[2] = (unsigned int)product;
+    
+    product = (unsigned long)x[0] * y[0] + out_high[1] + (product >> 32);
+    out_high[1] = (unsigned int)product;
+    out_high[0] = product >> 32;
 }
 
-
-void mulModP(const unsigned int a[8], const unsigned int b[8], unsigned int product_low[8])
+void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8])
 {
-    unsigned int high[8];
+    __private unsigned int high[8];
+    __private unsigned int low[8];
 
-    unsigned int hWord = 0;
-    unsigned int carry = 0;
+    __private unsigned int hWord = 0;
+    __private unsigned int carry = 0;
+    __private unsigned int t = 0;
+    __private unsigned int product6 = 0;
+    __private unsigned int product7 = 0;
+    __private unsigned int tmp;
 
     // 256 x 256 multiply
-    multiply256(a, b, high, product_low);
+    multiply256(a, b, high, low);
+    product_low[7] = low[7];
+    product_low[6] = low[6];
+    product_low[5] = low[5];
+    product_low[4] = low[4];
+    product_low[3] = low[3];
+    product_low[2] = low[2];
+    product_low[1] = low[1];
+    product_low[0] = low[0];
 
     // Add 2^32 * high to the low 256 bits (shift left 1 word and add)
     // Affects product[14] to product[6]
-    for(int i = 6; i >= 0; i--) {
-        product_low[i] = addc(product_low[i], high[i + 1], &carry);
-    }
-    unsigned int product7 = addc(high[0], 0, &carry);
-    unsigned int product6 = carry;
+    addc(product_low[6], high[7], product_low[6], carry, tmp);
+    addc(product_low[5], high[6], product_low[5], carry, tmp);
+    addc(product_low[4], high[5], product_low[4], carry, tmp);
+    addc(product_low[3], high[4], product_low[3], carry, tmp);
+    addc(product_low[2], high[3], product_low[2], carry, tmp);
+    addc(product_low[1], high[2], product_low[1], carry, tmp);
+    addc(product_low[0], high[1], product_low[0], carry, tmp);
+
+    addc(high[0], 0, product7, carry, tmp);
+    product6 = carry;
 
     carry = 0;
 
     // Multiply high by 977 and add to low
     // Affects product[15] to product[5]
     for(int i = 7; i >= 0; i--) {
-        unsigned int t = 0;
-        madd977(&hWord, &t, high[i], hWord);
-        product_low[i] = addc(product_low[i], t, &carry);
+        madd977(&hWord, &t, &high[i], &hWord);
+        addc(product_low[i], t, product_low[i], carry, tmp);
+        t = 0;
     }
-    product7 = addc(product7, hWord, &carry);
-    product6 = addc(product6, 0, &carry);
+    addc(product7, hWord, high[7], carry, tmp);
+    addc(product6, 0, high[6], carry, tmp);
 
     // Multiply high 2 words by 2^32 and add to low
     // Affects product[14] to product[7]
     carry = 0;
-    high[7] = product7;
-    high[6] = product6;
 
-    product7 = 0;
-    product6 = 0;
+    addc(product_low[6], high[7], product_low[6], carry, tmp);
+    addc(product_low[5], high[6], product_low[5], carry, tmp);
 
-    product_low[6] = addc(product_low[6], high[7], &carry);
-    product_low[5] = addc(product_low[5], high[6], &carry);
-
-    // Propagate the carry
-    for(int i = 4; i >= 0; i--) {
-        product_low[i] = addc(product_low[i], 0, &carry);
-    }
-    product7 = carry;
+    addc(product_low[4], 0, product_low[4], carry, tmp);
+    addc(product_low[3], 0, product_low[3], carry, tmp);
+    addc(product_low[2], 0, product_low[2], carry, tmp);
+    addc(product_low[1], 0, product_low[1], carry, tmp);
+    addc(product_low[0], 0, product_low[0], carry, tmp);
 
     // Multiply top 2 words by 977 and add to low
     // Affects product[15] to product[7]
     carry = 0;
     hWord = 0;
-    unsigned int t = 0;
-    madd977(&hWord, &t, high[7], hWord);
-    product_low[7] = addc(product_low[7], t, &carry);
-    madd977(&hWord, &t, high[6], hWord);
-    product_low[6] = addc(product_low[6], t, &carry);
-    product_low[5] = addc(product_low[5], hWord, &carry);
-
+    madd977(&hWord, &t, &high[7], &hWord);
+    addc(product_low[7], t, product_low[7], carry, tmp);
+    madd977(&hWord, &t, &high[6], &hWord);
+    addc(product_low[6], t,  product_low[6], carry, tmp);
+    addc(product_low[5], hWord,  product_low[5], carry, tmp);
     // Propagate carry
-    for(int i = 4; i >= 0; i--) {
-        product_low[i] = addc(product_low[i], 0, &carry);
-    }
-    product7 = carry;
+    addc(product_low[4], 0, product_low[4], carry, tmp);
+    addc(product_low[3], 0, product_low[3], carry, tmp);
+    addc(product_low[2], 0, product_low[2], carry, tmp);
+    addc(product_low[1], 0, product_low[1], carry, tmp);
+    addc(product_low[0], 0, product_low[0], carry, tmp);
 
     // Reduce if >= P
-    if(product7 || greaterThanEqualToP(product_low)) {
-        subP(product_low, product_low);
+    if(carry || greaterOrEqualToP(product_low)) {
+        carry = 0;
+        sub256k(product_low, P, product_low, carry, tmp);
     }
 }
 
-uint256_t mulModP256k(uint256_t a, uint256_t b)
-{
-    uint256_t c;
-
-    mulModP(a.v, b.v, c.v);
-
-    return c;
-}
-
-
-uint256_t squareModP256k(uint256_t a)
+/**
+ * Subtraction mod p
+ */
+void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8])
 {
-    uint256_t b;
-    mulModP(a.v, a.v, b.v);
-
-    return b;
+    __private unsigned int borrow = 0;
+    __private unsigned int tmp;
+    
+    sub256k(a, b, c, borrow, tmp);
+    
+    if (borrow) {
+        borrow = 0;
+        add256k(c, P, c, borrow, tmp);
+    }
 }
 
-
 /**
  * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains
  */
-uint256_t invModP256k(uint256_t value)
+void invModP256k(unsigned int x[8])
 {
-    uint256_t x = value;
-
-
-    //unsigned int y[8] = { 0, 0, 0, 0, 0, 0, 0, 1 };
-    uint256_t y = {{0, 0, 0, 0, 0, 0, 0, 1}};
-
-    // 0xd - 1101
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-
-    // 0x2 - 0010
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-
-    // 0xc = 0x1100
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-
-
-    // 0xfffff
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-
-
-    // 0xe - 1110
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffff
-    for(int i = 0; i < 219; i++) {
-        y = mulModP256k(x, y);
-        x = squareModP256k(x);
+    __private unsigned int y[8] = {0, 0, 0, 0, 0, 0, 0, 1};
+
+    mulModP(x, y, y);
+    mulModP(x, x, x);
+    mulModP(x, x, x);
+    mulModP(x, y, y);
+    mulModP(x, x, x);
+    mulModP(x, y, y);
+    mulModP(x, x, x);
+    mulModP(x, x, x);
+    mulModP(x, y, y);
+
+    for(int i = 0; i < 5; i++) {
+        mulModP(x, x, x);
     }
-    y = mulModP256k(x, y);
 
-    return y;
+    for(int i = 0; i < 22; i++) {
+        mulModP(x, y, y);
+        mulModP(x, x, x);
+    }
+
+    mulModP(x, x, x);
+
+    for(int i = 0; i < 222; i++) {
+        mulModP(x, y, y);
+        mulModP(x, x, x);
+    }
+
+    mulModP(x, y, x);
 }
 
+void addModP256k(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
+{
+    __private unsigned int borrow = 0;
+    __private unsigned int carry = 0;
+    __private unsigned int tmp = 0;
+
+    add256k(a, b, c, carry, tmp);
+
+    if(carry) { sub256k(c, P, c, borrow, tmp); }
+
+    else if(c[0] > P[0]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[0] < P[0]) {  }
+
+    else if(c[1] > P[1]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[1] < P[1]) {  }
+
+    else if(c[2] > P[2]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[2] < P[2]) {  }
+    
+    else if(c[3] > P[3]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[3] < P[3]) {  }
+    
+    else if(c[4] > P[4]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[4] < P[4]) {  }
+    
+    else if(c[5] > P[5]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[5] < P[5]) {  }
+    
+    else if(c[6] > P[6]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[6] < P[6]) {  }
+
+    else if(c[7] > P[7]) { sub256k(c, P, c, borrow, tmp); } 
+}
 
-void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse)
+void doBatchInverse256k(unsigned int x[8])
 {
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
-    int dim = get_global_size(0);
+    invModP256k(x);
+}
+
+void beginBatchAdd256k(
+    const uint256_t px,
+    const uint256_t x,
+    __global uint256_t* chain,
+    const int i,
+    const int batchIdx,
+    uint256_t* inverse
+) {
+    __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+    __private int dim = get_global_size(0);
+
+    __private unsigned int t[8];
 
     // x = Gx - x
-    uint256_t t = subModP256k(px, x);
+    subModP256k(px.v, x.v, t);
 
 
     // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1,
     // c[2] = diff2 * diff1 * diff0, etc
-    *inverse = mulModP256k(*inverse, t);
+    mulModP(inverse->v, t, inverse->v);
 
     chain[batchIdx * dim + gid] = *inverse;
 }
 
-
-void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* xPtr, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse)
-{
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
-    int dim = get_global_size(0);
-
-    uint256_t x = xPtr[i];
-
-    if(equal256k(px, x)) {
-        x = addModP256k(py, py);
+void beginBatchAddWithDouble256k(
+    const uint256_t px,
+    const uint256_t py,
+    __global uint256_t* xPtr,
+    __global uint256_t* chain,
+    const int i,
+    const int batchIdx,
+    uint256_t* inverse
+) {
+    __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+    __private int dim = get_global_size(0);
+    __private uint256_t x = xPtr[i];
+
+    if(equal256k(px.v, x.v)) {
+        addModP256k(py.v,py.v, x.v);
     } else {
         // x = Gx - x
-        x = subModP256k(px, x);
+        subModP256k(px.v, x.v, x.v);
     }
 
     // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1,
     // c[2] = diff2 * diff1 * diff0, etc
-    *inverse = mulModP256k(x, *inverse);
+    mulModP(x.v, inverse->v, inverse->v);
 
     chain[batchIdx * dim + gid] = *inverse;
 }
 
+void completeBatchAdd256k(
+    const uint256_t px,
+    const uint256_t py,
+    __global uint256_t* xPtr,
+    __global uint256_t* yPtr,
+    const int i,
+    const int batchIdx,
+    __global uint256_t* chain,
+    uint256_t* inverse,
+    uint256_t* newX,
+    uint256_t* newY)
+{
+    __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+    __private int dim = get_global_size(0);
+    __private uint256_t x = xPtr[i];
+    __private uint256_t y = yPtr[i];
+	
+    uint256_t s;
+    __private unsigned int tmp[8];
+
+    if(batchIdx != 0) {
+        uint256_t c;
+
+        c = chain[(batchIdx - 1) * dim + gid];
+        mulModP(inverse->v, c.v, s.v);
+
+        subModP256k(px.v, x.v, tmp);
+        mulModP(tmp, inverse->v, inverse->v);
+    } else {
+        s = *inverse;
+    }
+
+	subModP256k(py.v, y.v, tmp);
+
+    mulModP(tmp, s.v, s.v);
+
+    // Rx = s^2 - Gx - Qx
+    mulModP(s.v, s.v, tmp);
+
+    subModP256k(tmp, px.v, newX->v);
+    subModP256k(newX->v, x.v, newX->v);
+
+    // Ry = s(px - rx) - py
+	subModP256k(px.v, newX->v, tmp);
+    mulModP(s.v, tmp, newY->v);
+    subModP256k(newY->v, py.v, newY->v);
+}
 
 void completeBatchAddWithDouble256k(
-    uint256_t px,
-    uint256_t py,
+    const uint256_t px,
+    const uint256_t py,
     __global const uint256_t* xPtr,
     __global const uint256_t* yPtr,
-    int i,
-    int batchIdx,
+    const int i,
+    const int batchIdx,
     __global uint256_t* chain,
     uint256_t* inverse,
     uint256_t* newX,
     uint256_t* newY)
 {
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
-    int dim = get_global_size(0);
-    uint256_t s;
-    uint256_t x;
-    uint256_t y;
+    __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+    __private int dim = get_global_size(0);
+    __private uint256_t s;
+    __private uint256_t x;
+    __private uint256_t y;
 
     x = xPtr[i];
     y = yPtr[i];
 
     if(batchIdx >= 1) {
 
-        uint256_t c;
+        __private uint256_t c;
 
         c = chain[(batchIdx - 1) * dim + gid];
-        s = mulModP256k(*inverse, c);
+        mulModP(inverse->v, c.v, s.v);
 
         uint256_t diff;
-        if(equal256k(px, x)) {
-            diff = addModP256k(py, py);
+        if(equal256k(px.v, x.v)) {
+            addModP256k(py.v, py.v, diff.v);
         } else {
-            diff = subModP256k(px, x);
+            subModP256k(px.v, x.v, diff.v);
         }
 
-        *inverse = mulModP256k(diff, *inverse);
+        mulModP(diff.v, inverse->v, inverse->v);
     } else {
         s = *inverse;
     }
 
 
-    if(equal256k(px, x)) {
+    if(equal256k(px.v, x.v)) {
         // currently s = 1 / 2y
 
-        uint256_t x2;
-        uint256_t tx2;
-        uint256_t x3;
+        __private uint256_t x2;
+        __private uint256_t tx2;
 
         // 3x^2
-        x2 = mulModP256k(x, x);
-        tx2 = addModP256k(x2, x2);
-        tx2 = addModP256k(x2, tx2);
+        mulModP(x.v, x.v, x2.v);
+        addModP256k(x2.v, x2.v, tx2.v);
+        addModP256k(x2.v, tx2.v, tx2.v);
 
         // s = 3x^2 * 1/2y
-        s = mulModP256k(tx2, s);
+        mulModP(tx2.v, s.v, s.v);
 
         // s^2
-        uint256_t s2;
-        s2 = mulModP256k(s, s);
+        __private uint256_t s2;
+        mulModP(s.v, s.v, s2.v);
 
         // Rx = s^2 - 2px
-        *newX = subModP256k(s2, x);
-        *newX = subModP256k(*newX, x);
+        subModP256k(s2.v, x.v, newX->v);
+        subModP256k(newX->v, x.v, newX->v);
 
         // Ry = s(px - rx) - py
-        uint256_t k;
-        k = subModP256k(px, *newX);
-        *newY = mulModP256k(s, k);
-        *newY = subModP256k(*newY, py);
+        __private uint256_t k;
+				subModP256k(px.v, newX->v, k.v);
+        mulModP(s.v, k.v, newY->v);
+        subModP256k(newY->v, py.v,newY->v);
     } else {
 
-        uint256_t rise;
-        rise = subModP256k(py, y);
+        __private uint256_t rise;
+        subModP256k(py.v, y.v, rise.v);
 
-        s = mulModP256k(rise, s);
+        mulModP(rise.v, s.v, s.v);
 
         // Rx = s^2 - Gx - Qx
-        uint256_t s2;
-        s2 = mulModP256k(s, s);
+        __private uint256_t s2;
+        mulModP(s.v, s.v, s2.v);
 
-        *newX = subModP256k(s2, px);
-        *newX = subModP256k(*newX, x);
+        subModP256k(s2.v, px.v, newX->v);
+        subModP256k(newX->v, x.v,newX->v);
 
         // Ry = s(px - rx) - py
-        uint256_t k;
-        k = subModP256k(px, *newX);
-        *newY = mulModP256k(s, k);
-        *newY = subModP256k(*newY, py);
+        __private uint256_t k;
+        subModP256k(px.v, newX->v, k.v);
+        mulModP(s.v, k.v, newY->v);
+        subModP256k(newY->v, py.v, newY->v);
     }
 }
 
-
-void completeBatchAdd256k(
-    uint256_t px,
-    uint256_t py,
-    __global uint256_t* xPtr,
-    __global uint256_t* yPtr,
-    int i,
-    int batchIdx,
-    __global uint256_t* chain,
-    uint256_t* inverse,
-    uint256_t* newX,
-    uint256_t* newY)
+unsigned int readWord256k(__global const uint256_t* ara, const int idx, const int word)
 {
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
-    int dim = get_global_size(0);
-
-    uint256_t s;
-    uint256_t x;
-
-    x = xPtr[i];
-
-    if(batchIdx >= 1) {
-        uint256_t c;
-
-        c = chain[(batchIdx - 1) * dim + gid];
-        s = mulModP256k(*inverse, c);
-
-        uint256_t diff;
-        diff = subModP256k(px, x);
-        *inverse = mulModP256k(diff, *inverse);
-    } else {
-        s = *inverse;
-    }
-
-    uint256_t y;
-    y = yPtr[i];
-
-    uint256_t rise;
-    rise = subModP256k(py, y);
-
-    s = mulModP256k(rise, s);
-
-    // Rx = s^2 - Gx - Qx
-    uint256_t s2;
-    s2 = mulModP256k(s, s);
-
-    *newX = subModP256k(s2, px);
-    *newX = subModP256k(*newX, x);
-
-    // Ry = s(px - rx) - py
-    uint256_t k;
-    k = subModP256k(px, *newX);
-    *newY = mulModP256k(s, k);
-    *newY = subModP256k(*newY, py);
-}
-
-
-uint256_t doBatchInverse256k(uint256_t x)
-{
-    return invModP256k(x);
+    return ara[idx].v[word];
 }
 
 #endif
-#ifndef _SHA256_CL
-#define _SHA256_CL
-
+#ifndef SHA256_CL
+#define SHA256_CL
 
 __constant unsigned int _K[64] = {
     0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
@@ -1270,7 +1048,6 @@ __constant unsigned int _IV[8] = {
 
 #define rotr(x, n) ((x) >> (n)) ^ ((x) << (32 - (n)))
 
-
 #define MAJ(a, b, c) (((a) & (b)) ^ ((a) & (c)) ^ ((b) & (c)))
 
 #define CH(e, f, g) (((e) & (f)) ^ (~(e) & (g)))
@@ -1279,17 +1056,25 @@ __constant unsigned int _IV[8] = {
 
 #define s1(x) (rotr((x), 17) ^ rotr((x), 19) ^ ((x) >> 10))
 
-#define round(a, b, c, d, e, f, g, h, m, k)\
+#define roundSha(a, b, c, d, e, f, g, h, m, k)\
     t = CH((e), (f), (g)) + (rotr((e), 6) ^ rotr((e), 11) ^ rotr((e), 25)) + (k) + (m);\
     (d) += (t) + (h);\
     (h) += (t) + MAJ((a), (b), (c)) + (rotr((a), 2) ^ rotr((a), 13) ^ rotr((a), 22))
 
-
 void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned int digest[8])
 {
-    unsigned int a, b, c, d, e, f, g, h;
-    unsigned int w[16];
-    unsigned int t;
+    __private unsigned int a, b, c, d, e, f, g, h;
+    __private unsigned int w[16];
+    __private unsigned int t;
+
+    a = _IV[0];
+    b = _IV[1];
+    c = _IV[2];
+    d = _IV[3];
+    e = _IV[4];
+    f = _IV[5];
+    g = _IV[6];
+    h = _IV[7];
 
     // 0x04 || x || y
     w[0] = (x[0] >> 8) | 0x04000000;
@@ -1309,31 +1094,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = (y[6] >> 8) | (y[5] << 24);
     w[15] = (y[7] >> 8) | (y[6] << 24);
 
-    a = _IV[0];
-    b = _IV[1];
-    c = _IV[2];
-    d = _IV[3];
-    e = _IV[4];
-    f = _IV[5];
-    g = _IV[6];
-    h = _IV[7];
-
-    round(a, b, c, d, e, f, g, h, w[0], _K[0]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[1]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[2]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[3]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[4]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[5]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[6]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[7]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[8]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[9]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[10]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[11]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[12]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[13]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[14]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[15]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[3]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[4]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[5]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[6]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[7]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[8]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[9]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[10]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[11]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[12]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[13]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[14]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]);
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
     w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -1352,22 +1128,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[16]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[17]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[18]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[19]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[20]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[21]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[22]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[23]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[24]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[25]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[26]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[27]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[28]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[29]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[30]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[31]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]);
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
     w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -1386,22 +1162,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[32]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[33]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[34]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[35]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[36]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[37]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[38]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[39]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[40]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[41]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[42]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[43]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[44]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[45]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[46]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[47]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]);
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
     w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -1420,22 +1196,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[48]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[49]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[50]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[51]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[52]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[53]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[54]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[55]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[56]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[57]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[58]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[59]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[60]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[61]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[62]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[63]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]);
 
     a += _IV[0];
     b += _IV[1];
@@ -1447,35 +1223,34 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     h += _IV[7];
 
     // store the intermediate hash value
-    unsigned int tmp[8];
-    tmp[0] = a;
-    tmp[1] = b;
-    tmp[2] = c;
-    tmp[3] = d;
-    tmp[4] = e;
-    tmp[5] = f;
-    tmp[6] = g;
-    tmp[7] = h;
+    digest[0] = a;
+    digest[1] = b;
+    digest[2] = c;
+    digest[3] = d;
+    digest[4] = e;
+    digest[5] = f;
+    digest[6] = g;
+    digest[7] = h;
 
     w[0] = (y[7] << 24) | 0x00800000;
-    w[15] = 65 * 8;
-
-    round(a, b, c, d, e, f, g, h, w[0], _K[0]);
-    round(h, a, b, c, d, e, f, g, 0, _K[1]);
-    round(g, h, a, b, c, d, e, f, 0, _K[2]);
-    round(f, g, h, a, b, c, d, e, 0, _K[3]);
-    round(e, f, g, h, a, b, c, d, 0, _K[4]);
-    round(d, e, f, g, h, a, b, c, 0, _K[5]);
-    round(c, d, e, f, g, h, a, b, 0, _K[6]);
-    round(b, c, d, e, f, g, h, a, 0, _K[7]);
-    round(a, b, c, d, e, f, g, h, 0, _K[8]);
-    round(h, a, b, c, d, e, f, g, 0, _K[9]);
-    round(g, h, a, b, c, d, e, f, 0, _K[10]);
-    round(f, g, h, a, b, c, d, e, 0, _K[11]);
-    round(e, f, g, h, a, b, c, d, 0, _K[12]);
-    round(d, e, f, g, h, a, b, c, 0, _K[13]);
-    round(c, d, e, f, g, h, a, b, 0, _K[14]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[15]);
+    w[15] = 520; // 65 * 8
+
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]);
+    roundSha(h, a, b, c, d, e, f, g, 0, _K[1]);
+    roundSha(g, h, a, b, c, d, e, f, 0, _K[2]);
+    roundSha(f, g, h, a, b, c, d, e, 0, _K[3]);
+    roundSha(e, f, g, h, a, b, c, d, 0, _K[4]);
+    roundSha(d, e, f, g, h, a, b, c, 0, _K[5]);
+    roundSha(c, d, e, f, g, h, a, b, 0, _K[6]);
+    roundSha(b, c, d, e, f, g, h, a, 0, _K[7]);
+    roundSha(a, b, c, d, e, f, g, h, 0, _K[8]);
+    roundSha(h, a, b, c, d, e, f, g, 0, _K[9]);
+    roundSha(g, h, a, b, c, d, e, f, 0, _K[10]);
+    roundSha(f, g, h, a, b, c, d, e, 0, _K[11]);
+    roundSha(e, f, g, h, a, b, c, d, 0, _K[12]);
+    roundSha(d, e, f, g, h, a, b, c, 0, _K[13]);
+    roundSha(c, d, e, f, g, h, a, b, 0, _K[14]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]);
 
     w[0] = w[0] + s0(0) + 0 + s1(0);
     w[1] = 0 + s0(0) + 0 + s1(w[15]);
@@ -1494,22 +1269,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[16]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[17]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[18]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[19]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[20]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[21]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[22]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[23]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[24]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[25]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[26]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[27]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[28]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[29]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[30]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[31]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]);
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
     w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -1528,22 +1303,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[32]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[33]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[34]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[35]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[36]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[37]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[38]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[39]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[40]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[41]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[42]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[43]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[44]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[45]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[46]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[47]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]);
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
     w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -1562,38 +1337,38 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[48]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[49]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[50]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[51]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[52]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[53]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[54]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[55]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[56]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[57]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[58]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[59]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[60]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[61]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[62]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
-    digest[0] = tmp[0] + a;
-    digest[1] = tmp[1] + b;
-    digest[2] = tmp[2] + c;
-    digest[3] = tmp[3] + d;
-    digest[4] = tmp[4] + e;
-    digest[5] = tmp[5] + f;
-    digest[6] = tmp[6] + g;
-    digest[7] = tmp[7] + h;
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]);
+
+    digest[0] += a;
+    digest[1] += b;
+    digest[2] += c;
+    digest[3] += d;
+    digest[4] += e;
+    digest[5] += f;
+    digest[6] += g;
+    digest[7] += h;
 }
 
 void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8])
 {
-    unsigned int a, b, c, d, e, f, g, h;
-    unsigned int w[16];
-    unsigned int t;
+    __private unsigned int a, b, c, d, e, f, g, h;
+    __private unsigned int w[16];
+    __private unsigned int t;
 
     // 0x03 || x  or  0x02 || x
     w[0] = 0x02000000 | ((yParity & 1) << 24) | (x[0] >> 8);
@@ -1606,7 +1381,7 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
     w[6] = (x[6] >> 8) | (x[5] << 24);
     w[7] = (x[7] >> 8) | (x[6] << 24);
     w[8] = (x[7] << 24) | 0x00800000;
-    w[15] = 33 * 8;
+    w[15] = 264; // 33 * 8
 
     a = _IV[0];
     b = _IV[1];
@@ -1617,22 +1392,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
     g = _IV[6];
     h = _IV[7];
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[0]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[1]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[2]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[3]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[4]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[5]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[6]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[7]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[8]);
-    round(h, a, b, c, d, e, f, g, 0, _K[9]);
-    round(g, h, a, b, c, d, e, f, 0, _K[10]);
-    round(f, g, h, a, b, c, d, e, 0, _K[11]);
-    round(e, f, g, h, a, b, c, d, 0, _K[12]);
-    round(d, e, f, g, h, a, b, c, 0, _K[13]);
-    round(c, d, e, f, g, h, a, b, 0, _K[14]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[15]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[3]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[4]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[5]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[6]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[7]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[8]);
+    roundSha(h, a, b, c, d, e, f, g, 0, _K[9]);
+    roundSha(g, h, a, b, c, d, e, f, 0, _K[10]);
+    roundSha(f, g, h, a, b, c, d, e, 0, _K[11]);
+    roundSha(e, f, g, h, a, b, c, d, 0, _K[12]);
+    roundSha(d, e, f, g, h, a, b, c, 0, _K[13]);
+    roundSha(c, d, e, f, g, h, a, b, 0, _K[14]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]);
 
     w[0] = w[0] + s0(w[1]) + 0 + s1(0);
     w[1] = w[1] + s0(w[2]) + 0 + s1(w[15]);
@@ -1651,22 +1426,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
     w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[16]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[17]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[18]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[19]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[20]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[21]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[22]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[23]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[24]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[25]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[26]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[27]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[28]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[29]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[30]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[31]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]);
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
     w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -1685,22 +1460,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[32]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[33]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[34]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[35]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[36]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[37]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[38]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[39]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[40]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[41]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[42]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[43]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[44]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[45]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[46]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[47]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]);
 
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
@@ -1720,139 +1495,150 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[48]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[49]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[50]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[51]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[52]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[53]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[54]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[55]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[56]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[57]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[58]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[59]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[60]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[61]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[62]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
-    a += _IV[0];
-    b += _IV[1];
-    c += _IV[2];
-    d += _IV[3];
-    e += _IV[4];
-    f += _IV[5];
-    g += _IV[6];
-    h += _IV[7];
-
-    digest[0] = a;
-    digest[1] = b;
-    digest[2] = c;
-    digest[3] = d;
-    digest[4] = e;
-    digest[5] = f;
-    digest[6] = g;
-    digest[7] = h;
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]);
+
+    digest[0] = a + _IV[0];
+    digest[1] = b + _IV[1];
+    digest[2] = c + _IV[2];
+    digest[3] = d + _IV[3];
+    digest[4] = e + _IV[4];
+    digest[5] = f + _IV[5];
+    digest[6] = g + _IV[6];
+    digest[7] = h + _IV[7];
 }
 #endif
-#define COMPRESSED 0
-#define UNCOMPRESSED 1
-#define BOTH 2
-
-unsigned int endian(unsigned int x)
-{
-    return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24);
-}
+#ifndef BITCOIN_CL
+#define BITCOIN_CL
 
-typedef struct {
-    int idx;
-    bool compressed;
-    unsigned int x[8];
-    unsigned int y[8];
-    unsigned int digest[5];
-}CLDeviceResult;
+#ifndef endian
+#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24)
+#endif
 
-bool isInList(unsigned int hash[5], __global unsigned int *targetList, size_t numTargets)
+void hashPublicKeyCompressed(const uint256_t x, const unsigned int yParity, unsigned int digest[5])
 {
-    bool found = false;
+    __private unsigned int hash[8];
 
-    for(size_t i = 0; i < numTargets; i++) {
-        int equal = 0;
-
-        for(int j = 0; j < 5; j++) {
-            if(hash[j] == targetList[5 * i + j]) {
-                equal++;
-            }
-        }
-
-        if(equal == 5) {
-            found = true;
-        }
-    }
+    sha256PublicKeyCompressed(x.v, yParity, hash);
 
-    return found;
+    // Swap to little-endian
+    hash[0] = endian(hash[0]);
+    hash[1] = endian(hash[1]);
+    hash[2] = endian(hash[2]);
+    hash[3] = endian(hash[3]);
+    hash[4] = endian(hash[4]);
+    hash[5] = endian(hash[5]);
+    hash[6] = endian(hash[6]);
+    hash[7] = endian(hash[7]);
+
+    ripemd160sha256NoFinal(hash, digest);
 }
 
-bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong mask)
+void hashPublicKey(const uint256_t x, const uint256_t y, unsigned int digest[5])
 {
-    bool foundMatch = true;
+    __private unsigned int hash[8];
 
-    unsigned int h5 = 0;
-
-    for(int i = 0; i < 5; i++) {
-        h5 += hash[i];
-    }
-
-    uint64_t idx[5];
-
-    idx[0] = ((hash[0] << 6) | (h5 & 0x3f)) & mask;
-    idx[1] = ((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & mask;
-    idx[2] = ((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & mask;
-    idx[3] = ((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & mask;
-    idx[4] = ((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & mask;
-
-    for(int i = 0; i < 5; i++) {
-        unsigned int j = idx[i];
-        unsigned int f = targetList[j / 32];
-
-        if((f & (0x01 << (j % 32))) == 0) {
-            foundMatch = false;
-        }
-    }
+    sha256PublicKey(x.v, y.v, hash);
 
-    return foundMatch;
+    // Swap to little-endian
+    hash[0] = endian(hash[0]);
+    hash[1] = endian(hash[1]);
+    hash[2] = endian(hash[2]);
+    hash[3] = endian(hash[3]);
+    hash[4] = endian(hash[4]);
+    hash[5] = endian(hash[5]);
+    hash[6] = endian(hash[6]);
+    hash[7] = endian(hash[7]);
+
+    ripemd160sha256NoFinal(hash, digest);
 }
 
-bool checkHash(unsigned int hash[5], __global unsigned int *targetList, size_t numTargets, ulong mask)
+#endif
+#ifndef BLOOMFILTER_CL
+#define BLOOMFILTER_CL
+
+bool isInBloomFilter(const unsigned int hash[5], __global unsigned int *targetList, const ulong *mask)
 {
-    if(numTargets > 16) {
-        return isInBloomFilter(hash, targetList, mask);
-    } else {
-        return isInList(hash, targetList, numTargets);
-    }
+    unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4];
+
+    return (false == 
+        (
+            (targetList[(((hash[0] << 6) | (h5 & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[0] << 6) | (h5 & 0x3f)) & *mask) % 32))) == 0 ||
+            (targetList[(((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) % 32))) == 0 ||
+            (targetList[(((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) % 32))) == 0 ||
+            (targetList[(((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) % 32))) == 0 || 
+            (targetList[ (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) / 32] & (0x01 << ( (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) % 32))) == 0
+        )
+    );
 }
 
+#endif
+#define COMPRESSED 0
+#define UNCOMPRESSED 1
+#define BOTH 2
 
-void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
-{
-    const unsigned int iv[5] = {
-        0x67452301,
-        0xefcdab89,
-        0x98badcfe,
-        0x10325476,
-        0xc3d2e1f0
-    };
+typedef struct {
+    int idx;
+    bool compressed;
+    unsigned int x[8];
+    unsigned int y[8];
+    unsigned int digest[5];
+}CLDeviceResult;
 
-    for(int i = 0; i < 5; i++) {
-        hOut[i] = endian(hIn[i] + iv[(i + 1) % 5]);
-    }
-}
+void setResultFound(
+    const int idx,
+    const bool compressed,
+    const uint256_t x,
+    const uint256_t y,
+    const unsigned int digest[5],
+    __global CLDeviceResult* results,
+    __global unsigned int* numResults
+) {
+    CLDeviceResult r;
+
+    r.idx = idx;
+    r.compressed = compressed;
 
+    r.x[0] = x.v[0];
+    r.x[1] = x.v[1];
+    r.x[2] = x.v[2];
+    r.x[3] = x.v[3];
+    r.x[4] = x.v[4];
+    r.x[5] = x.v[5];
+    r.x[6] = x.v[6];
+    r.x[7] = x.v[7];
+
+    r.y[0] = y.v[0];
+    r.y[1] = y.v[1];
+    r.y[2] = y.v[2];
+    r.y[3] = y.v[3];
+    r.y[4] = y.v[4];
+    r.y[5] = y.v[5];
+    r.y[6] = y.v[6];
+    r.y[7] = y.v[7];
+
+    ripemd160FinalRound(digest, r.digest);
+
+    results[atomic_add(numResults, 1)] = r;
+}
 
-__kernel void multiplyStepKernel(
-    int totalPoints,
-    int step,
+__kernel void _initKeysKernel(
+    const unsigned int totalPoints,
+    const unsigned int step,
     __global uint256_t* privateKeys,
     __global uint256_t* chain,
     __global uint256_t* gxPtr,
@@ -1862,51 +1648,33 @@ __kernel void multiplyStepKernel(
 {
     uint256_t gx;
     uint256_t gy;
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+    int i = get_local_size(0) * get_group_id(0) + get_local_id(0);
     int dim = get_global_size(0);
 
     gx = gxPtr[step];
     gy = gyPtr[step];
 
-    // Multiply together all (_Gx - x) and then invert
     uint256_t inverse = { {0,0,0,0,0,0,0,1} };
 
     int batchIdx = 0;
-    int i = gid;
-    for(; i < totalPoints; i += dim) {
-
-        unsigned int p;
-        p = readWord256k(privateKeys, i, 7 - step / 32);
-
-        unsigned int bit = p & (1 << (step % 32));
-
-        uint256_t x = xPtr[i];
 
-        if(bit != 0) {
-            if(!isInfinity256k(x)) {
+    for(; i < totalPoints; i += dim) {
+        if(( (readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) {
+            if(!isInfinity256k(xPtr[i].v)) {
                 beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse);
                 batchIdx++;
             }
         }
     }
 
-    //doBatchInverse(inverse);
-    inverse = doBatchInverse256k(inverse);
+    doBatchInverse256k(inverse.v);
 
+    uint256_t newX;
+    uint256_t newY;
     i -= dim;
     for(; i >= 0; i -= dim) {
-        uint256_t newX;
-        uint256_t newY;
-
-        unsigned int p;
-        p = readWord256k(privateKeys, i, 7 - step / 32);
-        unsigned int bit = p & (1 << (step % 32));
-
-        uint256_t x = xPtr[i];
-        bool infinity = isInfinity256k(x);
-
-        if(bit != 0) {
-            if(!infinity) {
+        if(((readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) {
+            if(!isInfinity256k(xPtr[i].v)) {
                 batchIdx--;
                 completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY);
             } else {
@@ -1920,75 +1688,19 @@ __kernel void multiplyStepKernel(
     }
 }
 
-
-void hashPublicKey(uint256_t x, uint256_t y, unsigned int* digestOut)
-{
-    unsigned int hash[8];
-
-    sha256PublicKey(x.v, y.v, hash);
-
-    // Swap to little-endian
-    for(int i = 0; i < 8; i++) {
-        hash[i] = endian(hash[i]);
-    }
-
-    ripemd160sha256NoFinal(hash, digestOut);
-}
-
-void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int* digestOut)
-{
-    unsigned int hash[8];
-
-    sha256PublicKeyCompressed(x.v, yParity, hash);
-
-    // Swap to little-endian
-    for(int i = 0; i < 8; i++) {
-        hash[i] = endian(hash[i]);
-    }
-
-    ripemd160sha256NoFinal(hash, digestOut);
-
-}
-
-void atomicListAdd(__global CLDeviceResult *results, __global unsigned int *numResults, CLDeviceResult *r)
-{
-    unsigned int count = atomic_add(numResults, 1);
-
-    results[count] = *r;
-}
-
-void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned int digest[5], __global CLDeviceResult* results, __global unsigned int* numResults)
-{
-    CLDeviceResult r;
-
-    r.idx = idx;
-    r.compressed = compressed;
-
-    for(int i = 0; i < 8; i++) {
-        r.x[i] = x.v[i];
-        r.y[i] = y.v[i];
-    }
-
-    doRMD160FinalRound(digest, r.digest);
-
-    atomicListAdd(results, numResults, &r);
-}
-
-void doIteration(
-    size_t totalPoints,
-    int compression,
+__kernel void _stepKernel(
+    const unsigned int totalPoints,
     __global uint256_t* chain,
     __global uint256_t* xPtr,
     __global uint256_t* yPtr,
     __global uint256_t* incXPtr,
     __global uint256_t* incYPtr,
-    __global unsigned int *targetList,
-    size_t numTargets,
-    ulong mask,
+    __global unsigned int* targetList,
+    const ulong mask,
     __global CLDeviceResult *results,
     __global unsigned int *numResults)
 {
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+    int i = get_local_size(0) * get_group_id(0) + get_local_id(0);
     int dim = get_global_size(0);
 
     uint256_t incX = *incXPtr;
@@ -1996,48 +1708,35 @@ void doIteration(
 
     // Multiply together all (_Gx - x) and then invert
     uint256_t inverse = { {0,0,0,0,0,0,0,1} };
-    int i = gid;
     int batchIdx = 0;
 
-    for(; i < totalPoints; i += dim) {
-        uint256_t x;
-
-        unsigned int digest[5];
-
-        x = xPtr[i];
-
-        if((compression == UNCOMPRESSED) || (compression == BOTH)) {
-            uint256_t y = yPtr[i];
-
-            hashPublicKey(x, y, digest);
+    unsigned int digest[5];
 
-            if(checkHash(digest, targetList, numTargets, mask)) {
-                setResultFound(i, false, x, y, digest, results, numResults);
-            }
+    for(; i < totalPoints; i += dim) {
+       
+#if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH)
+        hashPublicKey(xPtr[i], yPtr[i], digest);
+        if(isInBloomFilter(digest, targetList, &mask)) {
+            setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults);
         }
-
-        if((compression == COMPRESSED) || (compression == BOTH)) {
-
-            hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest);
-
-            if(checkHash(digest, targetList, numTargets, mask)) {
-                uint256_t y = yPtr[i];
-                setResultFound(i, true, x, y, digest, results, numResults);
-            }
+#endif
+#if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH)
+        hashPublicKeyCompressed(xPtr[i], yPtr[i].v[7], digest);
+        if(isInBloomFilter(digest, targetList, &mask)) {
+            setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults);
         }
-
-        beginBatchAdd256k(incX, x, chain, i, batchIdx, &inverse);
+#endif
+        beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse);
         batchIdx++;
     }
 
-    inverse = doBatchInverse256k(inverse);
+    doBatchInverse256k(inverse.v);
 
     i -= dim;
-
+    uint256_t newX;
+    uint256_t newY;
     for(;  i >= 0; i -= dim) {
 
-        uint256_t newX;
-        uint256_t newY;
         batchIdx--;
         completeBatchAdd256k(incX, incY, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY);
 
@@ -2046,22 +1745,19 @@ void doIteration(
     }
 }
 
-
-void doIterationWithDouble(
-    size_t totalPoints,
-    int compression,
+__kernel void _stepKernelWithDouble(
+    const unsigned int totalPoints,
     __global uint256_t* chain,
     __global uint256_t* xPtr,
     __global uint256_t* yPtr,
     __global uint256_t* incXPtr,
     __global uint256_t* incYPtr,
     __global unsigned int* targetList,
-    size_t numTargets,
-    ulong mask,
+    const ulong mask,
     __global CLDeviceResult *results,
     __global unsigned int *numResults)
 {
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+    int i = get_local_size(0) * get_group_id(0) + get_local_id(0);
     int dim = get_global_size(0);
 
     uint256_t incX = *incXPtr;
@@ -2070,48 +1766,35 @@ void doIterationWithDouble(
     // Multiply together all (_Gx - x) and then invert
     uint256_t inverse = { {0,0,0,0,0,0,0,1} };
 
-    int i = gid;
     int batchIdx = 0;
-    for(; i < totalPoints; i += dim) {
-        uint256_t x;
-
-        unsigned int digest[5];
-
-        x = xPtr[i];
+    unsigned int digest[5];
 
-        // uncompressed
-        if((compression == UNCOMPRESSED) || (compression == BOTH)) {
-            uint256_t y = yPtr[i];
-            hashPublicKey(x, y, digest);
+    for(; i < totalPoints; i += dim) {
 
-            if(checkHash(digest, targetList, numTargets, mask)) {
-                setResultFound(i, false, x, y, digest, results, numResults);
-            }
+#if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH)
+        hashPublicKey(xPtr[i], yPtr[i], digest);
+        if(isInBloomFilter(digest, targetList, &mask)) {
+            setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults);
         }
-
-        // compressed
-        if((compression == COMPRESSED) || (compression == BOTH)) {
-
-            hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest);
-
-            if(checkHash(digest, targetList, numTargets, mask)) {
-
-                uint256_t y = yPtr[i];
-                setResultFound(i, true, x, y, digest, results, numResults);
-            }
+#endif
+#if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH)
+        hashPublicKeyCompressed(xPtr[i], yPtr[i].v[7], digest);
+        if(isInBloomFilter(digest, targetList, &mask)) {
+            setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults);
         }
+#endif
 
         beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse);
         batchIdx++;
     }
 
-    inverse = doBatchInverse256k(inverse);
+    doBatchInverse256k(inverse.v);
 
     i -= dim;
 
+    uint256_t newX;
+    uint256_t newY;
     for(; i >= 0; i -= dim) {
-        uint256_t newX;
-        uint256_t newY;
         batchIdx--;
         completeBatchAddWithDouble256k(incX, incY, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY);
 
@@ -2119,40 +1802,3 @@ void doIterationWithDouble(
         yPtr[i] = newY;
     }
 }
-
-/**
-* Performs a single iteration
-*/
-__kernel void keyFinderKernel(
-    unsigned int totalPoints,
-    int compression,
-    __global uint256_t* chain,
-    __global uint256_t* xPtr,
-    __global uint256_t* yPtr,
-    __global uint256_t* incXPtr,
-    __global uint256_t* incYPtr,
-    __global unsigned int* targetList,
-    ulong numTargets,
-    ulong mask,
-    __global CLDeviceResult *results,
-    __global unsigned int *numResults)
-{
-    doIteration(totalPoints, compression, chain, xPtr, yPtr, incXPtr, incYPtr, targetList, numTargets, mask, results, numResults);
-}
-
-__kernel void keyFinderKernelWithDouble(
-    unsigned int totalPoints,
-    int compression,
-    __global uint256_t* chain,
-    __global uint256_t* xPtr,
-    __global uint256_t* yPtr,
-    __global uint256_t* incXPtr,
-    __global uint256_t* incYPtr,
-    __global unsigned int* targetList,
-    ulong numTargets,
-    ulong mask,
-    __global CLDeviceResult *results,
-    __global unsigned int *numResults)
-{
-    doIterationWithDouble(totalPoints, compression, chain, xPtr, yPtr, incXPtr, incYPtr, targetList, numTargets, mask, results, numResults);
-}
diff --git a/CLKeySearchDevice/bloomfilter.cl b/CLKeySearchDevice/bloomfilter.cl
new file mode 100644
index 0000000..3e6265f
--- /dev/null
+++ b/CLKeySearchDevice/bloomfilter.cl
@@ -0,0 +1,19 @@
+#ifndef BLOOMFILTER_CL
+#define BLOOMFILTER_CL
+
+bool isInBloomFilter(const unsigned int hash[5], __global unsigned int *targetList, const ulong *mask)
+{
+    unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4];
+
+    return (false == 
+        (
+            (targetList[(((hash[0] << 6) | (h5 & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[0] << 6) | (h5 & 0x3f)) & *mask) % 32))) == 0 ||
+            (targetList[(((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) % 32))) == 0 ||
+            (targetList[(((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) % 32))) == 0 ||
+            (targetList[(((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) % 32))) == 0 || 
+            (targetList[ (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) / 32] & (0x01 << ( (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) % 32))) == 0
+        )
+    );
+}
+
+#endif
diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl
index 5da94c0..2728d3a 100644
--- a/CLKeySearchDevice/keysearch.cl
+++ b/CLKeySearchDevice/keysearch.cl
@@ -2,11 +2,6 @@
 #define UNCOMPRESSED 1
 #define BOTH 2
 
-unsigned int endian(unsigned int x)
-{
-    return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24);
-}
-
 typedef struct {
     int idx;
     bool compressed;
@@ -15,86 +10,46 @@ typedef struct {
     unsigned int digest[5];
 }CLDeviceResult;
 
-bool isInList(unsigned int hash[5], __global unsigned int *targetList, size_t numTargets)
-{
-    bool found = false;
-
-    for(size_t i = 0; i < numTargets; i++) {
-        int equal = 0;
-
-        for(int j = 0; j < 5; j++) {
-            if(hash[j] == targetList[5 * i + j]) {
-                equal++;
-            }
-        }
-
-        if(equal == 5) {
-            found = true;
-        }
-    }
-
-    return found;
-}
-
-bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong mask)
-{
-    bool foundMatch = true;
-
-    unsigned int h5 = 0;
-
-    for(int i = 0; i < 5; i++) {
-        h5 += hash[i];
-    }
-
-    uint64_t idx[5];
-
-    idx[0] = ((hash[0] << 6) | (h5 & 0x3f)) & mask;
-    idx[1] = ((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & mask;
-    idx[2] = ((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & mask;
-    idx[3] = ((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & mask;
-    idx[4] = ((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & mask;
-
-    for(int i = 0; i < 5; i++) {
-        unsigned int j = idx[i];
-        unsigned int f = targetList[j / 32];
-
-        if((f & (0x01 << (j % 32))) == 0) {
-            foundMatch = false;
-        }
-    }
-
-    return foundMatch;
-}
-
-bool checkHash(unsigned int hash[5], __global unsigned int *targetList, size_t numTargets, ulong mask)
-{
-    if(numTargets > 16) {
-        return isInBloomFilter(hash, targetList, mask);
-    } else {
-        return isInList(hash, targetList, numTargets);
-    }
-}
-
+void setResultFound(
+    const int idx,
+    const bool compressed,
+    const uint256_t x,
+    const uint256_t y,
+    const unsigned int digest[5],
+    __global CLDeviceResult* results,
+    __global unsigned int* numResults
+) {
+    CLDeviceResult r;
 
-void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
-{
-    const unsigned int iv[5] = {
-        0x67452301,
-        0xefcdab89,
-        0x98badcfe,
-        0x10325476,
-        0xc3d2e1f0
-    };
+    r.idx = idx;
+    r.compressed = compressed;
 
-    for(int i = 0; i < 5; i++) {
-        hOut[i] = endian(hIn[i] + iv[(i + 1) % 5]);
-    }
+    r.x[0] = x.v[0];
+    r.x[1] = x.v[1];
+    r.x[2] = x.v[2];
+    r.x[3] = x.v[3];
+    r.x[4] = x.v[4];
+    r.x[5] = x.v[5];
+    r.x[6] = x.v[6];
+    r.x[7] = x.v[7];
+
+    r.y[0] = y.v[0];
+    r.y[1] = y.v[1];
+    r.y[2] = y.v[2];
+    r.y[3] = y.v[3];
+    r.y[4] = y.v[4];
+    r.y[5] = y.v[5];
+    r.y[6] = y.v[6];
+    r.y[7] = y.v[7];
+
+    ripemd160FinalRound(digest, r.digest);
+
+    results[atomic_add(numResults, 1)] = r;
 }
 
-
-__kernel void multiplyStepKernel(
-    int totalPoints,
-    int step,
+__kernel void _initKeysKernel(
+    const unsigned int totalPoints,
+    const unsigned int step,
     __global uint256_t* privateKeys,
     __global uint256_t* chain,
     __global uint256_t* gxPtr,
@@ -104,51 +59,33 @@ __kernel void multiplyStepKernel(
 {
     uint256_t gx;
     uint256_t gy;
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+    int i = get_local_size(0) * get_group_id(0) + get_local_id(0);
     int dim = get_global_size(0);
 
     gx = gxPtr[step];
     gy = gyPtr[step];
 
-    // Multiply together all (_Gx - x) and then invert
     uint256_t inverse = { {0,0,0,0,0,0,0,1} };
 
     int batchIdx = 0;
-    int i = gid;
-    for(; i < totalPoints; i += dim) {
-
-        unsigned int p;
-        p = readWord256k(privateKeys, i, 7 - step / 32);
-
-        unsigned int bit = p & (1 << (step % 32));
 
-        uint256_t x = xPtr[i];
-
-        if(bit != 0) {
-            if(!isInfinity256k(x)) {
+    for(; i < totalPoints; i += dim) {
+        if(( (readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) {
+            if(!isInfinity256k(xPtr[i].v)) {
                 beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse);
                 batchIdx++;
             }
         }
     }
 
-    //doBatchInverse(inverse);
-    inverse = doBatchInverse256k(inverse);
+    doBatchInverse256k(inverse.v);
 
+    uint256_t newX;
+    uint256_t newY;
     i -= dim;
     for(; i >= 0; i -= dim) {
-        uint256_t newX;
-        uint256_t newY;
-
-        unsigned int p;
-        p = readWord256k(privateKeys, i, 7 - step / 32);
-        unsigned int bit = p & (1 << (step % 32));
-
-        uint256_t x = xPtr[i];
-        bool infinity = isInfinity256k(x);
-
-        if(bit != 0) {
-            if(!infinity) {
+        if(((readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) {
+            if(!isInfinity256k(xPtr[i].v)) {
                 batchIdx--;
                 completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY);
             } else {
@@ -162,75 +99,19 @@ __kernel void multiplyStepKernel(
     }
 }
 
-
-void hashPublicKey(uint256_t x, uint256_t y, unsigned int* digestOut)
-{
-    unsigned int hash[8];
-
-    sha256PublicKey(x.v, y.v, hash);
-
-    // Swap to little-endian
-    for(int i = 0; i < 8; i++) {
-        hash[i] = endian(hash[i]);
-    }
-
-    ripemd160sha256NoFinal(hash, digestOut);
-}
-
-void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int* digestOut)
-{
-    unsigned int hash[8];
-
-    sha256PublicKeyCompressed(x.v, yParity, hash);
-
-    // Swap to little-endian
-    for(int i = 0; i < 8; i++) {
-        hash[i] = endian(hash[i]);
-    }
-
-    ripemd160sha256NoFinal(hash, digestOut);
-
-}
-
-void atomicListAdd(__global CLDeviceResult *results, __global unsigned int *numResults, CLDeviceResult *r)
-{
-    unsigned int count = atomic_add(numResults, 1);
-
-    results[count] = *r;
-}
-
-void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned int digest[5], __global CLDeviceResult* results, __global unsigned int* numResults)
-{
-    CLDeviceResult r;
-
-    r.idx = idx;
-    r.compressed = compressed;
-
-    for(int i = 0; i < 8; i++) {
-        r.x[i] = x.v[i];
-        r.y[i] = y.v[i];
-    }
-
-    doRMD160FinalRound(digest, r.digest);
-
-    atomicListAdd(results, numResults, &r);
-}
-
-void doIteration(
-    size_t totalPoints,
-    int compression,
+__kernel void _stepKernel(
+    const unsigned int totalPoints,
     __global uint256_t* chain,
     __global uint256_t* xPtr,
     __global uint256_t* yPtr,
     __global uint256_t* incXPtr,
     __global uint256_t* incYPtr,
-    __global unsigned int *targetList,
-    size_t numTargets,
-    ulong mask,
+    __global unsigned int* targetList,
+    const ulong mask,
     __global CLDeviceResult *results,
     __global unsigned int *numResults)
 {
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+    int i = get_local_size(0) * get_group_id(0) + get_local_id(0);
     int dim = get_global_size(0);
 
     uint256_t incX = *incXPtr;
@@ -238,48 +119,35 @@ void doIteration(
 
     // Multiply together all (_Gx - x) and then invert
     uint256_t inverse = { {0,0,0,0,0,0,0,1} };
-    int i = gid;
     int batchIdx = 0;
 
-    for(; i < totalPoints; i += dim) {
-        uint256_t x;
-
-        unsigned int digest[5];
-
-        x = xPtr[i];
-
-        if((compression == UNCOMPRESSED) || (compression == BOTH)) {
-            uint256_t y = yPtr[i];
-
-            hashPublicKey(x, y, digest);
+    unsigned int digest[5];
 
-            if(checkHash(digest, targetList, numTargets, mask)) {
-                setResultFound(i, false, x, y, digest, results, numResults);
-            }
+    for(; i < totalPoints; i += dim) {
+       
+#if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH)
+        hashPublicKey(xPtr[i], yPtr[i], digest);
+        if(isInBloomFilter(digest, targetList, &mask)) {
+            setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults);
         }
-
-        if((compression == COMPRESSED) || (compression == BOTH)) {
-
-            hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest);
-
-            if(checkHash(digest, targetList, numTargets, mask)) {
-                uint256_t y = yPtr[i];
-                setResultFound(i, true, x, y, digest, results, numResults);
-            }
+#endif
+#if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH)
+        hashPublicKeyCompressed(xPtr[i], yPtr[i].v[7], digest);
+        if(isInBloomFilter(digest, targetList, &mask)) {
+            setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults);
         }
-
-        beginBatchAdd256k(incX, x, chain, i, batchIdx, &inverse);
+#endif
+        beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse);
         batchIdx++;
     }
 
-    inverse = doBatchInverse256k(inverse);
+    doBatchInverse256k(inverse.v);
 
     i -= dim;
-
+    uint256_t newX;
+    uint256_t newY;
     for(;  i >= 0; i -= dim) {
 
-        uint256_t newX;
-        uint256_t newY;
         batchIdx--;
         completeBatchAdd256k(incX, incY, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY);
 
@@ -287,114 +155,3 @@ void doIteration(
         yPtr[i] = newY;
     }
 }
-
-
-void doIterationWithDouble(
-    size_t totalPoints,
-    int compression,
-    __global uint256_t* chain,
-    __global uint256_t* xPtr,
-    __global uint256_t* yPtr,
-    __global uint256_t* incXPtr,
-    __global uint256_t* incYPtr,
-    __global unsigned int* targetList,
-    size_t numTargets,
-    ulong mask,
-    __global CLDeviceResult *results,
-    __global unsigned int *numResults)
-{
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
-    int dim = get_global_size(0);
-
-    uint256_t incX = *incXPtr;
-    uint256_t incY = *incYPtr;
-
-    // Multiply together all (_Gx - x) and then invert
-    uint256_t inverse = { {0,0,0,0,0,0,0,1} };
-
-    int i = gid;
-    int batchIdx = 0;
-    for(; i < totalPoints; i += dim) {
-        uint256_t x;
-
-        unsigned int digest[5];
-
-        x = xPtr[i];
-
-        // uncompressed
-        if((compression == UNCOMPRESSED) || (compression == BOTH)) {
-            uint256_t y = yPtr[i];
-            hashPublicKey(x, y, digest);
-
-            if(checkHash(digest, targetList, numTargets, mask)) {
-                setResultFound(i, false, x, y, digest, results, numResults);
-            }
-        }
-
-        // compressed
-        if((compression == COMPRESSED) || (compression == BOTH)) {
-
-            hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest);
-
-            if(checkHash(digest, targetList, numTargets, mask)) {
-
-                uint256_t y = yPtr[i];
-                setResultFound(i, true, x, y, digest, results, numResults);
-            }
-        }
-
-        beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse);
-        batchIdx++;
-    }
-
-    inverse = doBatchInverse256k(inverse);
-
-    i -= dim;
-
-    for(; i >= 0; i -= dim) {
-        uint256_t newX;
-        uint256_t newY;
-        batchIdx--;
-        completeBatchAddWithDouble256k(incX, incY, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY);
-
-        xPtr[i] = newX;
-        yPtr[i] = newY;
-    }
-}
-
-/**
-* Performs a single iteration
-*/
-__kernel void keyFinderKernel(
-    unsigned int totalPoints,
-    int compression,
-    __global uint256_t* chain,
-    __global uint256_t* xPtr,
-    __global uint256_t* yPtr,
-    __global uint256_t* incXPtr,
-    __global uint256_t* incYPtr,
-    __global unsigned int* targetList,
-    ulong numTargets,
-    ulong mask,
-    __global CLDeviceResult *results,
-    __global unsigned int *numResults)
-{
-    doIteration(totalPoints, compression, chain, xPtr, yPtr, incXPtr, incYPtr, targetList, numTargets, mask, results, numResults);
-}
-
-__kernel void keyFinderKernelWithDouble(
-    unsigned int totalPoints,
-    int compression,
-    __global uint256_t* chain,
-    __global uint256_t* xPtr,
-    __global uint256_t* yPtr,
-    __global uint256_t* incXPtr,
-    __global uint256_t* incYPtr,
-    __global unsigned int* targetList,
-    ulong numTargets,
-    ulong mask,
-    __global CLDeviceResult *results,
-    __global unsigned int *numResults)
-{
-    doIterationWithDouble(totalPoints, compression, chain, xPtr, yPtr, incXPtr, incYPtr, targetList, numTargets, mask, results, numResults);
-}
diff --git a/CLUnitTests/CLUnitTests.vcxproj b/CLUnitTests/CLUnitTests.vcxproj
index 6a8e415..6056285 100644
--- a/CLUnitTests/CLUnitTests.vcxproj
+++ b/CLUnitTests/CLUnitTests.vcxproj
@@ -28,26 +28,26 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
@@ -107,7 +107,7 @@ $(SolutionDir)\tools\embedcl.exe test.cl test.cpp _secp256k1_test_cl</Command>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>EnableAllWarnings</WarningLevel>
       <Optimization>Disabled</Optimization>
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
diff --git a/CLUnitTests/main.cpp b/CLUnitTests/main.cpp
index f221f2f..4320254 100644
--- a/CLUnitTests/main.cpp
+++ b/CLUnitTests/main.cpp
@@ -67,7 +67,7 @@ int main(int argc, char **argv)
     try {
         devices = cl::getDevices();
     }catch(cl::CLException ex) {
-        std::cout << "Error: " << ex.msg << std::endl;
+        std::cout << "Error " << ex.msg << ": " << ex.description << std::endl;
         return 1;
     }
 
@@ -86,7 +86,7 @@ int main(int argc, char **argv)
             numErrors += runTest(devices[i].id);
         }
         catch(cl::CLException ex) {
-            std::cout << "Error " << ex.msg << std::endl;
+            std::cout << "Error " << ex.msg << ": " << ex.description << std::endl;
         }
     }
 
diff --git a/CLUnitTests/secp256k1test.cl b/CLUnitTests/secp256k1test.cl
index d3119a1..de7e0ba 100644
--- a/CLUnitTests/secp256k1test.cl
+++ b/CLUnitTests/secp256k1test.cl
@@ -8,6 +8,17 @@ typedef struct {
 }CLErrorInfo;
 
 
+bool equal(const unsigned int a[8], const unsigned int b[8])
+{
+    for(int i = 0; i < 8; i++) {
+        if(a[i] != b[i]) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
 bool addTest()
 {
     unsigned int x[8] = { 0xa4aea9b8, 0x6fe248f5, 0x1fc74965, 0xe9493264, 0x4e2dff0c, 0x009f7c9c, 0x832fa59b, 0x3361f837 };
diff --git a/CmdParse/CmdParse.cpp b/CmdParse/CmdParse.cpp
index adc1c6c..7855bd2 100644
--- a/CmdParse/CmdParse.cpp
+++ b/CmdParse/CmdParse.cpp
@@ -80,4 +80,4 @@ std::vector<OptArg> CmdParse::getArgs()
 std::vector<std::string> CmdParse::getOperands()
 {
 	return _operands;
-}
\ No newline at end of file
+}
diff --git a/CmdParse/CmdParse.h b/CmdParse/CmdParse.h
index 3135fbf..ca85e03 100644
--- a/CmdParse/CmdParse.h
+++ b/CmdParse/CmdParse.h
@@ -1,5 +1,5 @@
-#ifndef _CMD_PARSE
-#define _CMD_PARSE
+#ifndef CMD_PARSE_H
+#define CMD_PARSE_H
 
 #include <string>
 #include <vector>
@@ -53,4 +53,4 @@ class CmdParse {
 	std::vector<std::string> getOperands();
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/CmdParse/CmdParse.vcxproj b/CmdParse/CmdParse.vcxproj
index e42fc4c..116946a 100644
--- a/CmdParse/CmdParse.vcxproj
+++ b/CmdParse/CmdParse.vcxproj
@@ -5,6 +5,14 @@
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|Win32">
+      <Configuration>Performance Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|x64">
+      <Configuration>Performance Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -34,26 +42,40 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PreferredToolArchitecture>x64</PreferredToolArchitecture>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
@@ -66,6 +88,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -74,6 +100,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -92,10 +122,22 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>EnableAllWarnings</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
     </ClCompile>
@@ -103,6 +145,24 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
diff --git a/CryptoUtil/CryptoUtil.h b/CryptoUtil/CryptoUtil.h
index fdd2b5c..aab42b6 100644
--- a/CryptoUtil/CryptoUtil.h
+++ b/CryptoUtil/CryptoUtil.h
@@ -10,7 +10,7 @@ namespace crypto {
 
 	public:
 		Rng();
-		void get(unsigned char *buf, int len);
+		void get(unsigned char *buf, size_t len);
 	};
 
 
@@ -20,6 +20,6 @@ namespace crypto {
 	void sha256(unsigned int *msg, unsigned int *digest);
 
 	unsigned int checksum(const unsigned int *hash);
-};
+}
 
-#endif
\ No newline at end of file
+#endif
diff --git a/CryptoUtil/CryptoUtil.vcxproj b/CryptoUtil/CryptoUtil.vcxproj
index 8b0ed8b..53ab0fc 100644
--- a/CryptoUtil/CryptoUtil.vcxproj
+++ b/CryptoUtil/CryptoUtil.vcxproj
@@ -5,6 +5,14 @@
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|Win32">
+      <Configuration>Performance Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|x64">
+      <Configuration>Performance Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -31,32 +39,46 @@
     <ProjectGuid>{CA46856A-1D1E-4F6F-A69C-6707D540BF36}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>CryptoUtil</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PreferredToolArchitecture>x64</PreferredToolArchitecture>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
@@ -69,6 +91,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -77,6 +103,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -95,10 +125,22 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>EnableAllWarnings</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
     </ClCompile>
@@ -106,6 +148,24 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
diff --git a/CryptoUtil/Rng.cpp b/CryptoUtil/Rng.cpp
index ada92b1..c07b4ce 100644
--- a/CryptoUtil/Rng.cpp
+++ b/CryptoUtil/Rng.cpp
@@ -46,7 +46,7 @@ void crypto::Rng::reseed()
 	secureRandom((unsigned char *)_state, 32);
 }
 
-void crypto::Rng::get(unsigned char *buf, int len)
+void crypto::Rng::get(unsigned char *buf, size_t len)
 {
 	int i = 0;
 	while(len > 0) {
@@ -70,4 +70,4 @@ void crypto::Rng::get(unsigned char *buf, int len)
 			len -= len;
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/CryptoUtil/hash.cpp b/CryptoUtil/hash.cpp
index 138a562..5d534aa 100644
--- a/CryptoUtil/hash.cpp
+++ b/CryptoUtil/hash.cpp
@@ -2,11 +2,6 @@
 #include<stdio.h>
 #include <string.h>
 
-static unsigned int endian(unsigned int x)
-{
-	return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24);
-}
-
 unsigned int crypto::checksum(const unsigned int *hash)
 {
 	unsigned int msg[16] = { 0 };
@@ -30,6 +25,8 @@ unsigned int crypto::checksum(const unsigned int *hash)
 
 	// Prepare to make a hash of the digest
 	memset(msg, 0, 16 * sizeof(unsigned int));
+
+	#pragma clang loop unroll(full)
 	for(int i = 0; i < 8; i++) {
 		msg[i] = digest[i];
 	}
diff --git a/CryptoUtil/ripemd160.cpp b/CryptoUtil/ripemd160.cpp
index f442f15..790a804 100644
--- a/CryptoUtil/ripemd160.cpp
+++ b/CryptoUtil/ripemd160.cpp
@@ -25,7 +25,7 @@ static unsigned int endian(unsigned int x)
 	return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24);
 }
 
-static unsigned int rotl(unsigned int x, int n)
+static unsigned int rotl(unsigned int x, unsigned int n)
 {
 	return (x << n) | (x >> (32 - n));
 }
@@ -325,4 +325,4 @@ void crypto::ripemd160(unsigned int *x, unsigned int *digest)
 	digest[2] = endian(_IV[3] + e1 + a2);
 	digest[3] = endian(_IV[4] + a1 + b2);
 	digest[4] = endian(_IV[0] + b1 + c2);
-}
\ No newline at end of file
+}
diff --git a/CryptoUtil/sha256.cpp b/CryptoUtil/sha256.cpp
index 1f8d853..6247272 100644
--- a/CryptoUtil/sha256.cpp
+++ b/CryptoUtil/sha256.cpp
@@ -50,6 +50,7 @@ static void round(unsigned int a, unsigned int b, unsigned int c, unsigned int &
 
 void crypto::sha256Init(unsigned int *digest)
 {
+	#pragma clang loop unroll(full)
 	for(int i = 0; i < 8; i++) {
 		digest[i] = _IV[i];
 	}
@@ -70,11 +71,14 @@ void crypto::sha256(unsigned int *msg, unsigned int *digest)
 	h = digest[7];
 
 	unsigned int w[80] = { 0 };
+	#pragma clang loop unroll(full)
 	for(int i = 0; i < 16; i++) {
 		w[i] = msg[i];
 	}
 
 	// Expand 16 words to 64 words
+
+	#pragma clang loop unroll(full)
 	for(int i = 16; i < 64; i++) {
 		unsigned int x = w[i - 15];
 		unsigned int y = w[i - 2];
@@ -84,6 +88,7 @@ void crypto::sha256(unsigned int *msg, unsigned int *digest)
 		w[i] = w[i - 16] + s0 + w[i - 7] + s1;
 	}
 
+	#pragma clang loop unroll(full)
 	for(int i = 0; i < 64; i += 8) {
 		round(a, b, c, d, e, f, g, h, w[i], _K[i]);
 		round(h, a, b, c, d, e, f, g, w[i + 1], _K[i + 1]);
@@ -103,4 +108,4 @@ void crypto::sha256(unsigned int *msg, unsigned int *digest)
 	digest[5] += f;
 	digest[6] += g;
 	digest[7] += h;
-}
\ No newline at end of file
+}
diff --git a/CudaKeySearchDevice/CudaAtomicList.cu b/CudaKeySearchDevice/CudaAtomicList.cu
deleted file mode 100644
index dcf1096..0000000
--- a/CudaKeySearchDevice/CudaAtomicList.cu
+++ /dev/null
@@ -1,116 +0,0 @@
-#include "CudaAtomicList.h"
-#include "CudaAtomicList.cuh"
-
-#include <stdio.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-
-static __constant__ void *_LIST_BUF[1];
-static __constant__ unsigned int *_LIST_SIZE[1];
-
-
-__device__ void atomicListAdd(void *info, unsigned int size)
-{
-	unsigned int count = atomicAdd(_LIST_SIZE[0], 1);
-
-	unsigned char *ptr = (unsigned char *)(_LIST_BUF[0]) + count * size;
-
-	memcpy(ptr, info, size);
-}
-
-static cudaError_t setListPtr(void *ptr, unsigned int *numResults)
-{
-	cudaError_t err = cudaMemcpyToSymbol(_LIST_BUF, &ptr, sizeof(void *));
-
-	if(err) {
-		return err;
-	}
-
-	err = cudaMemcpyToSymbol(_LIST_SIZE, &numResults, sizeof(unsigned int *));
-
-	return err;
-}
-
-
-cudaError_t CudaAtomicList::init(unsigned int itemSize, unsigned int maxItems)
-{
-	_itemSize = itemSize;
-
-	// The number of results found in the most recent kernel run
-	_countHostPtr = NULL;
-	cudaError_t err = cudaHostAlloc(&_countHostPtr, sizeof(unsigned int), cudaHostAllocMapped);
-	if(err) {
-		goto end;
-	}
-
-	// Number of items in the list
-	_countDevPtr = NULL;
-	err = cudaHostGetDevicePointer(&_countDevPtr, _countHostPtr, 0);
-	if(err) {
-		goto end;
-	}
-	*_countHostPtr = 0;
-
-	// Storage for results data
-	_hostPtr = NULL;
-	err = cudaHostAlloc(&_hostPtr, itemSize * maxItems, cudaHostAllocMapped);
-	if(err) {
-		goto end;
-	}
-
-	// Storage for results data (device to host pointer)
-	_devPtr = NULL;
-	err = cudaHostGetDevicePointer(&_devPtr, _hostPtr, 0);
-
-	if(err) {
-		goto end;
-	}
-
-	err = setListPtr(_devPtr, _countDevPtr);
-
-end:
-	if(err) {
-		cudaFreeHost(_countHostPtr);
-
-		cudaFree(_countDevPtr);
-
-		cudaFreeHost(_hostPtr);
-
-		cudaFree(_devPtr);
-	}
-
-	return err;
-}
-
-unsigned int CudaAtomicList::size()
-{
-	return *_countHostPtr;
-}
-
-void CudaAtomicList::clear()
-{
-	*_countHostPtr = 0;
-}
-
-unsigned int CudaAtomicList::read(void *ptr, unsigned int count)
-{
-	if(count >= *_countHostPtr) {
-		count = *_countHostPtr;
-	}
-
-	memcpy(ptr, _hostPtr, count * _itemSize);
-
-	return count;
-}
-
-void CudaAtomicList::cleanup()
-{
-	cudaFreeHost(_countHostPtr);
-
-	cudaFree(_countDevPtr);
-
-	cudaFreeHost(_hostPtr);
-
-	cudaFree(_devPtr);
-}
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaAtomicList.cuh b/CudaKeySearchDevice/CudaAtomicList.cuh
deleted file mode 100644
index 70dacb2..0000000
--- a/CudaKeySearchDevice/CudaAtomicList.cuh
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ATOMIC_LIST_CUH
-#define _ATOMIC_LIST_CUH
-
-#include <cuda_runtime.h>
-
-__device__ void atomicListAdd(void *info, unsigned int size);
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaAtomicList.h b/CudaKeySearchDevice/CudaAtomicList.h
deleted file mode 100644
index 8bd9eeb..0000000
--- a/CudaKeySearchDevice/CudaAtomicList.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef _ATOMIC_LIST_HOST_H
-#define _ATOMIC_LIST_HOST_H
-
-#include <cuda_runtime.h>
-
-/**
- A list that multiple device threads can append items to. Items can be
- read and removed by the host
- */
-class CudaAtomicList {
-
-private:
-	void *_devPtr;
-
-	void *_hostPtr;
-
-	unsigned int *_countHostPtr;
-
-	unsigned int *_countDevPtr;
-
-	unsigned int _maxSize;
-
-	unsigned int _itemSize;
-
-public:
-
-	CudaAtomicList()
-	{
-		_devPtr = NULL;
-		_hostPtr = NULL;
-		_countHostPtr = NULL;
-		_countDevPtr = NULL;
-		_maxSize = 0;
-		_itemSize = 0;
-	}
-
-	~CudaAtomicList()
-	{
-		cleanup();
-	}
-
-	cudaError_t init(unsigned int itemSize, unsigned int maxItems);
-
-	unsigned int read(void *dest, unsigned int count);
-
-	unsigned int size();
-
-	void clear();
-
-    void cleanup();
-
-};
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaDeviceKeys.cu b/CudaKeySearchDevice/CudaDeviceKeys.cu
deleted file mode 100644
index d98dbaa..0000000
--- a/CudaKeySearchDevice/CudaDeviceKeys.cu
+++ /dev/null
@@ -1,397 +0,0 @@
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <device_launch_parameters.h>
-
-#include "CudaDeviceKeys.h"
-#include "CudaDeviceKeys.cuh"
-#include "secp256k1.cuh"
-
-
-__constant__ unsigned int *_xPtr[1];
-
-__constant__ unsigned int *_yPtr[1];
-
-
-__device__ unsigned int *ec::getXPtr()
-{
-	return _xPtr[0];
-}
-
-__device__ unsigned int *ec::getYPtr()
-{
-	return _yPtr[0];
-}
-
-__global__ void multiplyStepKernel(const unsigned int *privateKeys, int pointsPerThread, int step, unsigned int *chain, const unsigned int *gxPtr, const unsigned int *gyPtr);
-
-
-int CudaDeviceKeys::getIndex(int block, int thread, int idx)
-{
-	// Total number of threads
-	int totalThreads = _blocks * _threads;
-
-	int base = idx * totalThreads;
-
-	// Global ID of the current thread
-	int threadId = block * _threads + thread;
-
-	return base + threadId;
-}
-
-void CudaDeviceKeys::splatBigInt(unsigned int *dest, int block, int thread, int idx, const secp256k1::uint256 &i)
-{
-	unsigned int value[8] = { 0 };
-
-	i.exportWords(value, 8, secp256k1::uint256::BigEndian);
-
-	int totalThreads = _blocks * _threads;
-	int threadId = block * _threads + thread;
-
-	int base = idx * _blocks * _threads * 8;
-
-	int index = base + threadId;
-
-	for(int k = 0; k < 8; k++) {
-		dest[index] = value[k];
-		index += totalThreads;
-	}
-}
-
-secp256k1::uint256 CudaDeviceKeys::readBigInt(unsigned int *src, int block, int thread, int idx)
-{
-	unsigned int value[8] = { 0 };
-
-	int totalThreads = _blocks * _threads;
-	int threadId = block * _threads + thread;
-
-	int base = idx * _blocks * _threads * 8;
-
-	int index = base + threadId;
-
-	for(int k = 0; k < 8; k++) {
-		value[k] = src[index];
-		index += totalThreads;
-	}
-
-	secp256k1::uint256 v(value, secp256k1::uint256::BigEndian);
-
-	return v;
-}
-
-/**
-* Allocates device memory for storing the multiplication chain used in
-the batch inversion operation
-*/
-cudaError_t CudaDeviceKeys::allocateChainBuf(unsigned int count)
-{
-	cudaError_t err = cudaMalloc(&_devChain, count * sizeof(unsigned int) * 8);
-
-	if(err) {
-		return err;
-	}
-
-	return err;
-}
-
-cudaError_t CudaDeviceKeys::initializeBasePoints()
-{
-	// generate a table of points G, 2G, 4G, 8G...(2^255)G
-	std::vector<secp256k1::ecpoint> table;
-
-	table.push_back(secp256k1::G());
-	for(int i = 1; i < 256; i++) {
-
-		secp256k1::ecpoint p = doublePoint(table[i - 1]);
-		if(!pointExists(p)) {
-			throw std::string("Point does not exist!");
-		}
-		table.push_back(p);
-	}
-
-	unsigned int count = 256;
-
-	cudaError_t err = cudaMalloc(&_devBasePointX, sizeof(unsigned int) * count * 8);
-
-	if(err) {
-		return err;
-	}
-
-	err = cudaMalloc(&_devBasePointY, sizeof(unsigned int) * count * 8);
-	if(err) {
-		return err;
-	}
-
-	unsigned int *tmpX = new unsigned int[count * 8];
-	unsigned int *tmpY = new unsigned int[count * 8];
-
-	for(int i = 0; i < 256; i++) {
-		unsigned int bufX[8];
-		unsigned int bufY[8];
-		table[i].x.exportWords(bufX, 8, secp256k1::uint256::BigEndian);
-		table[i].y.exportWords(bufY, 8, secp256k1::uint256::BigEndian);
-
-		for(int j = 0; j < 8; j++) {
-			tmpX[i * 8 + j] = bufX[j];
-			tmpY[i * 8 + j] = bufY[j];
-		}
-	}
-
-	err = cudaMemcpy(_devBasePointX, tmpX, count * 8 * sizeof(unsigned int), cudaMemcpyHostToDevice);
-
-	delete[] tmpX;
-
-	if(err) {
-		delete[] tmpY;
-		return err;
-	}
-
-	err = cudaMemcpy(_devBasePointY, tmpY, count * 8 * sizeof(unsigned int), cudaMemcpyHostToDevice);
-
-	delete[] tmpY;
-
-	return err;
-}
-
-cudaError_t CudaDeviceKeys::initializePublicKeys(size_t count)
-{
-
-	// Allocate X array
-	cudaError_t err = cudaMalloc(&_devX, sizeof(unsigned int) * count * 8);
-	if(err) {
-		return err;
-	}
-
-	// Clear X array
-	err = cudaMemset(_devX, -1, sizeof(unsigned int) * count * 8);
-	if(err) {
-		return err;
-	}
-
-	// Allocate Y array
-	err = cudaMalloc(&_devY, sizeof(unsigned int) * count * 8);
-	if(err) {
-		return err;
-	}
-
-	// Clear Y array
-	err = cudaMemset(_devY, -1, sizeof(unsigned int) * count * 8);
-	if(err) {
-		return err;
-	}
-
-	err = cudaMemcpyToSymbol(_xPtr, &_devX, sizeof(unsigned int *));
-	if(err) {
-		return err;
-	}
-
-	err = cudaMemcpyToSymbol(_yPtr, &_devY, sizeof(unsigned int *));
-	
-	return err;
-}
-
-cudaError_t CudaDeviceKeys::init(int blocks, int threads, int pointsPerThread, const std::vector<secp256k1::uint256> &privateKeys)
-{
-	_blocks = blocks;
-	_threads = threads;
-	_pointsPerThread = pointsPerThread;
-
-	size_t count = privateKeys.size();
-
-	// Allocate space for public keys on device
-	cudaError_t err = initializePublicKeys(count);
-
-	if(err) {
-		return err;
-	}
-
-	err = initializeBasePoints();
-	if(err) {
-		return err;
-	}
-
-	// Allocate private keys on device
-	err = cudaMalloc(&_devPrivate, sizeof(unsigned int) * count * 8);
-	if(err) {
-		return err;
-	}
-
-
-	// Clear private keys
-	err = cudaMemset(_devPrivate, 0, sizeof(unsigned int) * count * 8);
-	if(err) {
-		return err;
-	}
-
-	err = allocateChainBuf(_threads * _blocks * _pointsPerThread);
-	if(err) {
-		return err;
-	}
-
-	// Copy private keys to system memory buffer
-	unsigned int *tmp = new unsigned int[count * 8];
-
-	for(int block = 0; block < _blocks; block++) {
-		for(int thread = 0; thread < _threads; thread++) {
-			for(int idx = 0; idx < _pointsPerThread; idx++) {
-
-				int index = getIndex(block, thread, idx);
-
-				splatBigInt(tmp, block, thread, idx, privateKeys[index]);
-			}
-		}
-	}
-
-	// Copy private keys to device memory
-	err = cudaMemcpy(_devPrivate, tmp, count * sizeof(unsigned int) * 8, cudaMemcpyHostToDevice);
-
-	delete[] tmp;
-
-	if(err) {
-		return err;
-	}
-
-	return cudaSuccess;
-}
-
-void CudaDeviceKeys::clearPublicKeys()
-{
-	cudaFree(_devX);
-	cudaFree(_devY);
-
-	_devX = NULL;
-	_devY = NULL;
-}
-
-void CudaDeviceKeys::clearPrivateKeys()
-{
-	cudaFree(_devBasePointX);
-	cudaFree(_devBasePointY);
-	cudaFree(_devPrivate);
-	cudaFree(_devChain);
-
-	_devChain = NULL;
-	_devBasePointX = NULL;
-	_devBasePointY = NULL;
-	_devPrivate = NULL;
-}
-
-cudaError_t CudaDeviceKeys::doStep()
-{
-	multiplyStepKernel <<<_blocks, _threads>>>(_devPrivate, _pointsPerThread, _step, _devChain, _devBasePointX, _devBasePointY);
-
-	// Wait for kernel to complete
-    cudaError_t err = cudaDeviceSynchronize();
-	fflush(stdout);
-	_step++;
-	return err;
-}
-
-__global__ void multiplyStepKernel(const unsigned int *privateKeys, int pointsPerThread, int step, unsigned int *chain, const unsigned int *gxPtr, const unsigned int *gyPtr)
-{
-	unsigned int *xPtr = ec::getXPtr();
-
-	unsigned int *yPtr = ec::getYPtr();
-
-	unsigned int gx[8];
-	unsigned int gy[8];
-
-	for(int i = 0; i < 8; i++) {
-		gx[i] = gxPtr[step * 8 + i];
-		gy[i] = gyPtr[step * 8 + i];
-	}
-
-	// Multiply together all (_Gx - x) and then invert
-	unsigned int inverse[8] = { 0,0,0,0,0,0,0,1 };
-
-	int batchIdx = 0;
-	for(int i = 0; i < pointsPerThread; i++) {
-
-		unsigned int p[8];
-		readInt(privateKeys, i, p);
-		unsigned int bit = p[7 - step / 32] & 1 << ((step % 32));
-		
-		unsigned int x[8];
-		readInt(xPtr, i, x);
-
-		if(bit != 0) {
-			if(!isInfinity(x)) {
-				beginBatchAddWithDouble(gx, gy, xPtr, chain, i, batchIdx, inverse);
-				batchIdx++;
-			}
-		}
-	}
-
-	doBatchInverse(inverse);
-
-	for(int i = pointsPerThread - 1; i >= 0; i--) {
-
-		unsigned int newX[8];
-		unsigned int newY[8];
-
-		unsigned int p[8];
-		readInt(privateKeys, i, p);
-		unsigned int bit = p[7 - step / 32] & 1 << ((step % 32));
-
-		unsigned int x[8];
-		readInt(xPtr, i, x);
-
-		bool infinity = isInfinity(x);
-
-		if(bit != 0) {
-			if(!infinity) {
-				batchIdx--;
-				completeBatchAddWithDouble(gx, gy, xPtr, yPtr, i, batchIdx, chain, inverse, newX, newY);
-			} else {
-				copyBigInt(gx, newX);
-				copyBigInt(gy, newY);
-			}
-
-			writeInt(xPtr, i, newX);
-			writeInt(yPtr, i, newY);
-		}
-	}
-}
-
-bool CudaDeviceKeys::selfTest(const std::vector<secp256k1::uint256> &privateKeys)
-{
-	unsigned int numPoints = _threads * _blocks * _pointsPerThread;
-
-	unsigned int *xBuf = new unsigned int[numPoints * 8];
-	unsigned int *yBuf = new unsigned int[numPoints * 8];
-
-	cudaError_t err = cudaMemcpy(xBuf, _devX, sizeof(unsigned int) * 8 * numPoints, cudaMemcpyDeviceToHost);
-
-	err = cudaMemcpy(yBuf, _devY, sizeof(unsigned int) * 8 * numPoints, cudaMemcpyDeviceToHost);
-
-
-	for(int block = 0; block < _blocks; block++) {
-		for(int thread = 0; thread < _threads; thread++) {
-			for(int idx = 0; idx < _pointsPerThread; idx++) {
-
-				int index = getIndex(block, thread, idx);
-
-				secp256k1::uint256 privateKey = privateKeys[index];
-
-				secp256k1::uint256 x = readBigInt(xBuf, block, thread, idx);
-				secp256k1::uint256 y = readBigInt(yBuf, block, thread, idx);
-
-				secp256k1::ecpoint p1(x, y);
-				secp256k1::ecpoint p2 = secp256k1::multiplyPoint(privateKey, secp256k1::G());
-
-				if(!secp256k1::pointExists(p1)) {
-					throw std::string("Validation failed: invalid point");
-				}
-
-				if(!secp256k1::pointExists(p2)) {
-					throw std::string("Validation failed: invalid point");
-				}
-
-				if(!(p1 == p2)) {
-					throw std::string("Validation failed: points do not match");
-				}
-			}
-		}
-	}
-
-	return true;
-}
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaDeviceKeys.cuh b/CudaKeySearchDevice/CudaDeviceKeys.cuh
deleted file mode 100644
index 3758b1c..0000000
--- a/CudaKeySearchDevice/CudaDeviceKeys.cuh
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _EC_CUH
-#define _EC_CUH
-
-#include <cuda_runtime.h>
-
-namespace ec {
-	__device__ unsigned int *getXPtr();
-
-	__device__ unsigned int *getYPtr();
-}
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaDeviceKeys.h b/CudaKeySearchDevice/CudaDeviceKeys.h
deleted file mode 100644
index f2407f1..0000000
--- a/CudaKeySearchDevice/CudaDeviceKeys.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef _EC_H
-#define _EC_H
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-
-#include <vector>
-#include "secp256k1.h"
-
-
-class CudaDeviceKeys {
-
-private:
-	int _blocks;
-
-	int _threads;
-
-	int _pointsPerThread;
-
-	unsigned int _numKeys;
-
-	unsigned int *_devX;
-
-	unsigned int *_devY;
-
-	unsigned int *_devPrivate;
-
-	unsigned int *_devChain;
-
-	unsigned int *_devBasePointX;
-
-	unsigned int *_devBasePointY;
-
-	int _step;
-
-	int getIndex(int block, int thread, int idx);
-
-	void splatBigInt(unsigned int *dest, int block, int thread, int idx, const secp256k1::uint256 &i);
-
-	secp256k1::uint256 readBigInt(unsigned int *src, int block, int thread, int idx);
-
-	cudaError_t allocateChainBuf(unsigned int count);
-
-	cudaError_t initializePublicKeys(size_t count);
-
-	cudaError_t initializeBasePoints();
-
-
-public:
-
-	CudaDeviceKeys()
-	{
-		_numKeys = 0;
-		_devX = NULL;
-		_devY = NULL;
-		_devPrivate = NULL;
-		_devChain = NULL;
-		_devBasePointX = NULL;
-		_devBasePointY = NULL;
-		_step = 0;
-	}
-
-	~CudaDeviceKeys()
-	{
-		clearPublicKeys();
-		clearPrivateKeys();
-	}
-
-	cudaError_t init(int blocks, int threads, int pointsPerThread, const std::vector<secp256k1::uint256> &privateKeys);
-
-	bool selfTest(const std::vector<secp256k1::uint256> &privateKeys);
-
-	cudaError_t doStep();
-
-	void clearPrivateKeys();
-
-	void clearPublicKeys();
-
-};
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaHashLookup.cu b/CudaKeySearchDevice/CudaHashLookup.cu
deleted file mode 100644
index ce99ef2..0000000
--- a/CudaKeySearchDevice/CudaHashLookup.cu
+++ /dev/null
@@ -1,306 +0,0 @@
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <math.h>
-#include <vector>
-
-#include "KeySearchDevice.h"
-
-#include "CudaHashLookup.h"
-
-#include "CudaHashLookup.cuh"
-
-#include "Logger.h"
-
-#include "util.h"
-
-#define MAX_TARGETS_CONSTANT_MEM 16
-
-__constant__ unsigned int _TARGET_HASH[MAX_TARGETS_CONSTANT_MEM][5];
-__constant__ unsigned int _NUM_TARGET_HASHES[1];
-__constant__ unsigned int *_BLOOM_FILTER[1];
-__constant__ unsigned int _BLOOM_FILTER_MASK[1];
-__constant__ unsigned long long _BLOOM_FILTER_MASK64[1];
-
-__constant__ unsigned int _USE_BLOOM_FILTER[1];
-
-
-static unsigned int swp(unsigned int x)
-{
-	return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24);
-}
-
-static void undoRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
-{
-	unsigned int iv[5] = {
-		0x67452301,
-		0xefcdab89,
-		0x98badcfe,
-		0x10325476,
-		0xc3d2e1f0
-	};
-
-	for(int i = 0; i < 5; i++) {
-		hOut[i] = swp(hIn[i]) - iv[(i + 1) % 5];
-	}
-}
-
-/**
-Copies the target hashes to constant memory
-*/
-cudaError_t CudaHashLookup::setTargetConstantMemory(const std::vector<struct hash160> &targets)
-{
-	size_t count = targets.size();
-
-	for(size_t i = 0; i < count; i++) {
-		unsigned int h[5];
-
-		undoRMD160FinalRound(targets[i].h, h);
-
-		cudaError_t err = cudaMemcpyToSymbol(_TARGET_HASH, h, sizeof(unsigned int) * 5, i * sizeof(unsigned int) * 5);
-
-		if(err) {
-			return err;
-		}
-	}
-
-	cudaError_t err = cudaMemcpyToSymbol(_NUM_TARGET_HASHES, &count, sizeof(unsigned int));
-	if(err) {
-		return err;
-	}
-
-	unsigned int useBloomFilter = 0;
-
-	err = cudaMemcpyToSymbol(_USE_BLOOM_FILTER, &useBloomFilter, sizeof(bool));
-	if(err) {
-		return err;
-	}
-
-	return cudaSuccess;
-}
-
-/**
-Returns the optimal bloom filter size in bits given the probability of false-positives and the
-number of hash functions
-*/
-unsigned int CudaHashLookup::getOptimalBloomFilterBits(double p, size_t n)
-{
-	double m = 3.6 * ceil((n * log(p)) / log(1 / pow(2, log(2))));
-
-	return (unsigned int)ceil(log(m) / log(2));
-}
-
-void CudaHashLookup::initializeBloomFilter(const std::vector<struct hash160> &targets, unsigned int *filter, unsigned int mask)
-{
-	// Use the low 16 bits of each word in the hash as the index into the bloom filter
-	for(unsigned int i = 0; i < targets.size(); i++) {
-
-		unsigned int h[5];
-
-		undoRMD160FinalRound(targets[i].h, h);
-
-		for(int j = 0; j < 5; j++) {
-			unsigned int idx = h[j] & mask;
-
-			filter[idx / 32] |= (0x01 << (idx % 32));
-		}
-
-	}
-}
-
-void CudaHashLookup::initializeBloomFilter64(const std::vector<struct hash160> &targets, unsigned int *filter, unsigned long long mask)
-{
-	for(unsigned int k = 0; k < targets.size(); k++) {
-
-		unsigned int hash[5];
-
-		unsigned long long idx[5];
-
-		undoRMD160FinalRound(targets[k].h, hash);
-
-		idx[0] = ((unsigned long long)hash[0] << 32 | hash[1]) & mask;
-		idx[1] = ((unsigned long long)hash[2] << 32 | hash[3]) & mask;
-		idx[2] = ((unsigned long long)(hash[0]^hash[1]) << 32 | (hash[1]^hash[2])) & mask;
-		idx[3] = ((unsigned long long)(hash[2]^hash[3]) << 32 | (hash[3] ^ hash[4])) & mask;
-		idx[4] = ((unsigned long long)(hash[0]^hash[3]) << 32 | (hash[1]^hash[3])) & mask;
-
-		for(int i = 0; i < 5; i++) {
-
-			filter[idx[i] / 32] |= (0x01 << (idx[i] % 32));
-		}
-	}
-}
-
-/**
-Populates the bloom filter with the target hashes
-*/
-cudaError_t CudaHashLookup::setTargetBloomFilter(const std::vector<struct hash160> &targets)
-{
-	unsigned int bloomFilterBits = getOptimalBloomFilterBits(1.0e-9, targets.size());
-
-	unsigned long long bloomFilterSizeWords = (unsigned long long)1 << (bloomFilterBits - 5);
-	unsigned long long bloomFilterBytes = (unsigned long long)1 << (bloomFilterBits - 3);
-	unsigned long long bloomFilterMask = (((unsigned long long)1 << bloomFilterBits) - 1);
-
-	Logger::log(LogLevel::Info, "Allocating bloom filter (" + util::format("%.1f", (double)bloomFilterBytes/(double)(1024*1024)) + "MB)");
-
-	unsigned int *filter = NULL;
-	
-	try {
-		filter = new unsigned int[bloomFilterSizeWords];
-	} catch(std::bad_alloc) {
-		Logger::log(LogLevel::Error, "Out of system memory");
-
-		return cudaErrorMemoryAllocation;
-	}
-
-	cudaError_t err = cudaMalloc(&_bloomFilterPtr, bloomFilterBytes);
-
-	if(err) {
-		Logger::log(LogLevel::Error, "Device error: " + std::string(cudaGetErrorString(err)));
-		delete[] filter;
-		return err;
-	}
-
-	memset(filter, 0, sizeof(unsigned int) * bloomFilterSizeWords);
-	if(bloomFilterBits > 32) {
-		initializeBloomFilter64(targets, filter, bloomFilterMask);
-	} else {
-		initializeBloomFilter(targets, filter, (unsigned int)bloomFilterMask);
-	}
-
-	// Copy to device
-	err = cudaMemcpy(_bloomFilterPtr, filter, sizeof(unsigned int) * bloomFilterSizeWords, cudaMemcpyHostToDevice);
-	if(err) {
-		cudaFree(_bloomFilterPtr);
-		_bloomFilterPtr = NULL;
-		delete[] filter;
-		return err;
-	}
-
-	// Copy device memory pointer to constant memory
-	err = cudaMemcpyToSymbol(_BLOOM_FILTER, &_bloomFilterPtr, sizeof(unsigned int *));
-	if(err) {
-		cudaFree(_bloomFilterPtr);
-		_bloomFilterPtr = NULL;
-		delete[] filter;
-		return err;
-	}
-
-	// Copy device memory pointer to constant memory
-	if(bloomFilterBits <= 32) {
-		err = cudaMemcpyToSymbol(_BLOOM_FILTER_MASK, &bloomFilterMask, sizeof(unsigned int *));
-		if(err) {
-			cudaFree(_bloomFilterPtr);
-			_bloomFilterPtr = NULL;
-			delete[] filter;
-			return err;
-		}
-	} else {
-		err = cudaMemcpyToSymbol(_BLOOM_FILTER_MASK64, &bloomFilterMask, sizeof(unsigned long long *));
-		if(err) {
-			cudaFree(_bloomFilterPtr);
-			_bloomFilterPtr = NULL;
-			delete[] filter;
-			return err;
-		}
-	}
-
-	unsigned int useBloomFilter = bloomFilterBits <= 32 ? 1 : 2;
-
-	err = cudaMemcpyToSymbol(_USE_BLOOM_FILTER, &useBloomFilter, sizeof(unsigned int));
-
-	delete[] filter;
-
-	return err;
-}
-
-/**
-*Copies the target hashes to either constant memory, or the bloom filter depending
-on how many targets there are
-*/
-cudaError_t CudaHashLookup::setTargets(const std::vector<struct hash160> &targets)
-{
-	cleanup();
-
-	if(targets.size() <= MAX_TARGETS_CONSTANT_MEM) {
-		return setTargetConstantMemory(targets);
-	} else {
-		return setTargetBloomFilter(targets);
-	}
-}
-
-void CudaHashLookup::cleanup()
-{
-	if(_bloomFilterPtr != NULL) {
-		cudaFree(_bloomFilterPtr);
-		_bloomFilterPtr = NULL;
-	}
-}
-
-__device__ bool checkBloomFilter(const unsigned int hash[5])
-{
-	bool foundMatch = true;
-
-	unsigned int mask = _BLOOM_FILTER_MASK[0];
-	unsigned int *bloomFilter = _BLOOM_FILTER[0];
-
-	for(int i = 0; i < 5; i++) {
-        unsigned int idx = hash[i] & mask;
-
-        unsigned int f = bloomFilter[idx / 32];
-
-		if((f & (0x01 << (idx % 32))) == 0) {
-			foundMatch = false;
-		}
-	}
-
-	return foundMatch;
-}
-
-__device__ bool checkBloomFilter64(const unsigned int hash[5])
-{
-	bool foundMatch = true;
-
-	unsigned long long mask = _BLOOM_FILTER_MASK64[0];
-	unsigned int *bloomFilter = _BLOOM_FILTER[0];
-	unsigned long long idx[5];
-
-	idx[0] = ((unsigned long long)hash[0] << 32 | hash[1]) & mask;
-	idx[1] = ((unsigned long long)hash[2] << 32 | hash[3]) & mask;
-	idx[2] = ((unsigned long long)(hash[0] ^ hash[1]) << 32 | (hash[1] ^ hash[2])) & mask;
-	idx[3] = ((unsigned long long)(hash[2] ^ hash[3]) << 32 | (hash[3] ^ hash[4])) & mask;
-	idx[4] = ((unsigned long long)(hash[0] ^ hash[3]) << 32 | (hash[1] ^ hash[3])) & mask;
-
-	for(int i = 0; i < 5; i++) {
-		unsigned int f = bloomFilter[idx[i] / 32];
-
-		if((f & (0x01 << (idx[i] % 32))) == 0) {
-			foundMatch = false;
-		}
-	}
-
-	return foundMatch;
-}
-
-
-__device__ bool checkHash(const unsigned int hash[5])
-{
-	bool foundMatch = false;
-
-	if(*_USE_BLOOM_FILTER == 1) {
-		return checkBloomFilter(hash);
-	} else if(*_USE_BLOOM_FILTER == 2) {
-		return checkBloomFilter64(hash);
-	} else {
-		for(int j = 0; j < *_NUM_TARGET_HASHES; j++) {
-			bool equal = true;
-			for(int i = 0; i < 5; i++) {
-				equal &= (hash[i] == _TARGET_HASH[j][i]);
-			}
-
-			foundMatch |= equal;
-		}
-	}
-
-	return foundMatch;
-}
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaHashLookup.cuh b/CudaKeySearchDevice/CudaHashLookup.cuh
deleted file mode 100644
index 83b3982..0000000
--- a/CudaKeySearchDevice/CudaHashLookup.cuh
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ADDRESS_LOOKUP_CUH
-#define _ADDRESS_LOOKUP_CUH
-
-__device__ bool checkHash(const unsigned int hash[5]);
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaHashLookup.h b/CudaKeySearchDevice/CudaHashLookup.h
deleted file mode 100644
index 8e8d87e..0000000
--- a/CudaKeySearchDevice/CudaHashLookup.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef _HASH_LOOKUP_HOST_H
-#define _HASH_LOOKUP_HOST_H
-
-#include <cuda_runtime.h>
-
-class CudaHashLookup {
-
-private:
-	unsigned int *_bloomFilterPtr;
-
-	cudaError_t setTargetBloomFilter(const std::vector<struct hash160> &targets);
-	
-	cudaError_t setTargetConstantMemory(const std::vector<struct hash160> &targets);
-	
-	unsigned int getOptimalBloomFilterBits(double p, size_t n);
-
-	void cleanup();
-
-	void initializeBloomFilter(const std::vector<struct hash160> &targets, unsigned int *filter, unsigned int mask);
-	
-	void initializeBloomFilter64(const std::vector<struct hash160> &targets, unsigned int *filter, unsigned long long mask);
-
-public:
-
-	CudaHashLookup()
-	{
-		_bloomFilterPtr = NULL;
-	}
-
-	~CudaHashLookup()
-	{
-		cleanup();
-	}
-
-	cudaError_t setTargets(const std::vector<struct hash160> &targets);
-};
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaKeySearchDevice.cpp b/CudaKeySearchDevice/CudaKeySearchDevice.cpp
deleted file mode 100644
index aad1fd3..0000000
--- a/CudaKeySearchDevice/CudaKeySearchDevice.cpp
+++ /dev/null
@@ -1,316 +0,0 @@
-#include "CudaKeySearchDevice.h"
-#include "Logger.h"
-#include "util.h"
-#include "cudabridge.h"
-#include "AddressUtil.h"
-
-void CudaKeySearchDevice::cudaCall(cudaError_t err)
-{
-    if(err) {
-        std::string errStr = cudaGetErrorString(err);
-
-        throw KeySearchException(errStr);
-    }
-}
-
-CudaKeySearchDevice::CudaKeySearchDevice(int device, int threads, int pointsPerThread, int blocks)
-{
-    cuda::CudaDeviceInfo info;
-    try {
-        info = cuda::getDeviceInfo(device);
-        _deviceName = info.name;
-    } catch(cuda::CudaException ex) {
-        throw KeySearchException(ex.msg);
-    }
-
-    if(threads <= 0 || threads % 32 != 0) {
-        throw KeySearchException("The number of threads must be a multiple of 32");
-    }
-
-    if(pointsPerThread <= 0) {
-        throw KeySearchException("At least 1 point per thread required");
-    }
-
-    // Specifying blocks on the commandline is depcreated but still supported. If there is no value for
-    // blocks, devide the threads evenly among the multi-processors
-    if(blocks == 0) {
-        if(threads % info.mpCount != 0) {
-            throw KeySearchException("The number of threads must be a multiple of " + util::format("%d", info.mpCount));
-        }
-
-        _threads = threads / info.mpCount;
-
-        _blocks = info.mpCount;
-
-        while(_threads > 512) {
-            _threads /= 2;
-            _blocks *= 2;
-        }
-    } else {
-        _threads = threads;
-        _blocks = blocks;
-    }
-
-    _iterations = 0;
-
-    _device = device;
-
-    _pointsPerThread = pointsPerThread;
-}
-
-void CudaKeySearchDevice::init(const secp256k1::uint256 &start, int compression, const secp256k1::uint256 &stride)
-{
-    if(start.cmp(secp256k1::N) >= 0) {
-        throw KeySearchException("Starting key is out of range");
-    }
-
-    _startExponent = start;
-
-    _compression = compression;
-
-    _stride = stride;
-
-    cudaCall(cudaSetDevice(_device));
-
-    // Block on kernel calls
-    cudaCall(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
-
-    // Use a larger portion of shared memory for L1 cache
-    cudaCall(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
-
-    generateStartingPoints();
-
-    cudaCall(allocateChainBuf(_threads * _blocks * _pointsPerThread));
-
-    // Set the incrementor
-    secp256k1::ecpoint g = secp256k1::G();
-    secp256k1::ecpoint p = secp256k1::multiplyPoint(secp256k1::uint256((uint64_t)_threads * _blocks * _pointsPerThread) * _stride, g);
-
-    cudaCall(_resultList.init(sizeof(CudaDeviceResult), 16));
-
-    cudaCall(setIncrementorPoint(p.x, p.y));
-}
-
-
-void CudaKeySearchDevice::generateStartingPoints()
-{
-    uint64_t totalPoints = (uint64_t)_pointsPerThread * _threads * _blocks;
-    uint64_t totalMemory = totalPoints * 40;
-
-    std::vector<secp256k1::uint256> exponents;
-
-    Logger::log(LogLevel::Info, "Generating " + util::formatThousands(totalPoints) + " starting points (" + util::format("%.1f", (double)totalMemory / (double)(1024 * 1024)) + "MB)");
-
-    // Generate key pairs for k, k+1, k+2 ... k + <total points in parallel - 1>
-    secp256k1::uint256 privKey = _startExponent;
-
-    exponents.push_back(privKey);
-
-    for(uint64_t i = 1; i < totalPoints; i++) {
-        privKey = privKey.add(_stride);
-        exponents.push_back(privKey);
-    }
-
-    cudaCall(_deviceKeys.init(_blocks, _threads, _pointsPerThread, exponents));
-
-    // Show progress in 10% increments
-    double pct = 10.0;
-    for(int i = 1; i <= 256; i++) {
-        cudaCall(_deviceKeys.doStep());
-
-        if(((double)i / 256.0) * 100.0 >= pct) {
-            Logger::log(LogLevel::Info, util::format("%.1f%%", pct));
-            pct += 10.0;
-        }
-    }
-
-    Logger::log(LogLevel::Info, "Done");
-
-    _deviceKeys.clearPrivateKeys();
-}
-
-
-void CudaKeySearchDevice::setTargets(const std::set<KeySearchTarget> &targets)
-{
-    _targets.clear();
-    
-    for(std::set<KeySearchTarget>::iterator i = targets.begin(); i != targets.end(); ++i) {
-        hash160 h(i->value);
-        _targets.push_back(h);
-    }
-
-    cudaCall(_targetLookup.setTargets(_targets));
-}
-
-void CudaKeySearchDevice::doStep()
-{
-    uint64_t numKeys = (uint64_t)_blocks * _threads * _pointsPerThread;
-
-    try {
-        if(_iterations < 2 && _startExponent.cmp(numKeys) <= 0) {
-            callKeyFinderKernel(_blocks, _threads, _pointsPerThread, true, _compression);
-        } else {
-            callKeyFinderKernel(_blocks, _threads, _pointsPerThread, false, _compression);
-        }
-    } catch(cuda::CudaException ex) {
-        throw KeySearchException(ex.msg);
-    }
-
-    getResultsInternal();
-
-    _iterations++;
-}
-
-uint64_t CudaKeySearchDevice::keysPerStep()
-{
-    return (uint64_t)_blocks * _threads * _pointsPerThread;
-}
-
-std::string CudaKeySearchDevice::getDeviceName()
-{
-    return _deviceName;
-}
-
-void CudaKeySearchDevice::getMemoryInfo(uint64_t &freeMem, uint64_t &totalMem)
-{
-    cudaCall(cudaMemGetInfo(&freeMem, &totalMem));
-}
-
-void CudaKeySearchDevice::removeTargetFromList(const unsigned int hash[5])
-{
-    size_t count = _targets.size();
-
-    while(count) {
-        if(memcmp(hash, _targets[count - 1].h, 20) == 0) {
-            _targets.erase(_targets.begin() + count - 1);
-            return;
-        }
-        count--;
-    }
-}
-
-bool CudaKeySearchDevice::isTargetInList(const unsigned int hash[5])
-{
-    size_t count = _targets.size();
-
-    while(count) {
-        if(memcmp(hash, _targets[count - 1].h, 20) == 0) {
-            return true;
-        }
-        count--;
-    }
-
-    return false;
-}
-
-uint32_t CudaKeySearchDevice::getPrivateKeyOffset(int thread, int block, int idx)
-{
-    // Total number of threads
-    int totalThreads = _blocks * _threads;
-
-    int base = idx * totalThreads;
-
-    // Global ID of the current thread
-    int threadId = block * _threads + thread;
-
-    return base + threadId;
-}
-
-void CudaKeySearchDevice::getResultsInternal()
-{
-    int count = _resultList.size();
-    int actualCount = 0;
-    if(count == 0) {
-        return;
-    }
-
-    unsigned char *ptr = new unsigned char[count * sizeof(CudaDeviceResult)];
-
-    _resultList.read(ptr, count);
-
-    for(int i = 0; i < count; i++) {
-        struct CudaDeviceResult *rPtr = &((struct CudaDeviceResult *)ptr)[i];
-
-        // might be false-positive
-        if(!isTargetInList(rPtr->digest)) {
-            continue;
-        }
-        actualCount++;
-
-        KeySearchResult minerResult;
-
-        // Calculate the private key based on the number of iterations and the current thread
-        secp256k1::uint256 offset = (secp256k1::uint256((uint64_t)_blocks * _threads * _pointsPerThread * _iterations) + secp256k1::uint256(getPrivateKeyOffset(rPtr->thread, rPtr->block, rPtr->idx))) * _stride;
-        secp256k1::uint256 privateKey = secp256k1::addModN(_startExponent, offset);
-
-        minerResult.privateKey = privateKey;
-        minerResult.compressed = rPtr->compressed;
-
-        memcpy(minerResult.hash, rPtr->digest, 20);
-
-        minerResult.publicKey = secp256k1::ecpoint(secp256k1::uint256(rPtr->x, secp256k1::uint256::BigEndian), secp256k1::uint256(rPtr->y, secp256k1::uint256::BigEndian));
-
-        removeTargetFromList(rPtr->digest);
-
-        _results.push_back(minerResult);
-    }
-
-    delete[] ptr;
-
-    _resultList.clear();
-
-    // Reload the bloom filters
-    if(actualCount) {
-        cudaCall(_targetLookup.setTargets(_targets));
-    }
-}
-
-// Verify a private key produces the public key and hash
-bool CudaKeySearchDevice::verifyKey(const secp256k1::uint256 &privateKey, const secp256k1::ecpoint &publicKey, const unsigned int hash[5], bool compressed)
-{
-    secp256k1::ecpoint g = secp256k1::G();
-
-    secp256k1::ecpoint p = secp256k1::multiplyPoint(privateKey, g);
-
-    if(!(p == publicKey)) {
-        return false;
-    }
-
-    unsigned int xWords[8];
-    unsigned int yWords[8];
-
-    p.x.exportWords(xWords, 8, secp256k1::uint256::BigEndian);
-    p.y.exportWords(yWords, 8, secp256k1::uint256::BigEndian);
-
-    unsigned int digest[5];
-    if(compressed) {
-        Hash::hashPublicKeyCompressed(xWords, yWords, digest);
-    } else {
-        Hash::hashPublicKey(xWords, yWords, digest);
-    }
-
-    for(int i = 0; i < 5; i++) {
-        if(digest[i] != hash[i]) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-size_t CudaKeySearchDevice::getResults(std::vector<KeySearchResult> &resultsOut)
-{
-    for(int i = 0; i < _results.size(); i++) {
-        resultsOut.push_back(_results[i]);
-    }
-    _results.clear();
-
-    return resultsOut.size();
-}
-
-secp256k1::uint256 CudaKeySearchDevice::getNextKey()
-{
-    uint64_t totalPoints = (uint64_t)_pointsPerThread * _threads * _blocks;
-
-    return _startExponent + secp256k1::uint256(totalPoints) * _iterations * _stride;
-}
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaKeySearchDevice.cu b/CudaKeySearchDevice/CudaKeySearchDevice.cu
deleted file mode 100644
index cbb79ad..0000000
--- a/CudaKeySearchDevice/CudaKeySearchDevice.cu
+++ /dev/null
@@ -1,261 +0,0 @@
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <device_launch_parameters.h>
-#include "KeySearchTypes.h"
-#include "CudaKeySearchDevice.h"
-#include "ptx.cuh"
-#include "secp256k1.cuh"
-
-#include "sha256.cuh"
-#include "ripemd160.cuh"
-
-#include "secp256k1.h"
-
-#include "CudaHashLookup.cuh"
-#include "CudaAtomicList.cuh"
-#include "CudaDeviceKeys.cuh"
-
-__constant__ unsigned int _INC_X[8];
-
-__constant__ unsigned int _INC_Y[8];
-
-__constant__ unsigned int *_CHAIN[1];
-
-static unsigned int *_chainBufferPtr = NULL;
-
-
-__device__ void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
-{
-    const unsigned int iv[5] = {
-        0x67452301,
-        0xefcdab89,
-        0x98badcfe,
-        0x10325476,
-        0xc3d2e1f0
-    };
-
-    for(int i = 0; i < 5; i++) {
-        hOut[i] = endian(hIn[i] + iv[(i + 1) % 5]);
-    }
-}
-
-
-/**
- * Allocates device memory for storing the multiplication chain used in
- the batch inversion operation
- */
-cudaError_t allocateChainBuf(unsigned int count)
-{
-    cudaError_t err = cudaMalloc(&_chainBufferPtr, count * sizeof(unsigned int) * 8);
-
-    if(err) {
-        return err;
-    }
-
-    err = cudaMemcpyToSymbol(_CHAIN, &_chainBufferPtr, sizeof(unsigned int *));
-    if(err) {
-        cudaFree(_chainBufferPtr);
-    }
-
-    return err;
-}
-
-void cleanupChainBuf()
-{
-    if(_chainBufferPtr != NULL) {
-        cudaFree(_chainBufferPtr);
-        _chainBufferPtr = NULL;
-    }
-}
-
-/**
- *Sets the EC point which all points will be incremented by
- */
-cudaError_t setIncrementorPoint(const secp256k1::uint256 &x, const secp256k1::uint256 &y)
-{
-    unsigned int xWords[8];
-    unsigned int yWords[8];
-
-    x.exportWords(xWords, 8, secp256k1::uint256::BigEndian);
-    y.exportWords(yWords, 8, secp256k1::uint256::BigEndian);
-
-    cudaError_t err = cudaMemcpyToSymbol(_INC_X, xWords, sizeof(unsigned int) * 8);
-    if(err) {
-        return err;
-    }
-
-    return cudaMemcpyToSymbol(_INC_Y, yWords, sizeof(unsigned int) * 8);
-}
-
-
-
-__device__ void hashPublicKey(const unsigned int *x, const unsigned int *y, unsigned int *digestOut)
-{
-    unsigned int hash[8];
-
-    sha256PublicKey(x, y, hash);
-
-    // Swap to little-endian
-    for(int i = 0; i < 8; i++) {
-        hash[i] = endian(hash[i]);
-    }
-
-    ripemd160sha256NoFinal(hash, digestOut);
-}
-
-__device__ void hashPublicKeyCompressed(const unsigned int *x, unsigned int yParity, unsigned int *digestOut)
-{
-    unsigned int hash[8];
-
-    sha256PublicKeyCompressed(x, yParity, hash);
-
-    // Swap to little-endian
-    for(int i = 0; i < 8; i++) {
-        hash[i] = endian(hash[i]);
-    }
-
-    ripemd160sha256NoFinal(hash, digestOut);
-}
-
-
-__device__ void setResultFound(int idx, bool compressed, unsigned int x[8], unsigned int y[8], unsigned int digest[5])
-{
-    CudaDeviceResult r;
-
-    r.block = blockIdx.x;
-    r.thread = threadIdx.x;
-    r.idx = idx;
-    r.compressed = compressed;
-
-    for(int i = 0; i < 8; i++) {
-        r.x[i] = x[i];
-        r.y[i] = y[i];
-    }
-
-    doRMD160FinalRound(digest, r.digest);
-
-    atomicListAdd(&r, sizeof(r));
-}
-
-__device__ void doIteration(int pointsPerThread, int compression)
-{
-    unsigned int *chain = _CHAIN[0];
-    unsigned int *xPtr = ec::getXPtr();
-    unsigned int *yPtr = ec::getYPtr();
-
-    // Multiply together all (_Gx - x) and then invert
-    unsigned int inverse[8] = {0,0,0,0,0,0,0,1};
-    for(int i = 0; i < pointsPerThread; i++) {
-        unsigned int x[8];
-
-        unsigned int digest[5];
-
-        readInt(xPtr, i, x);
-
-        if(compression == PointCompressionType::UNCOMPRESSED || compression == PointCompressionType::BOTH) {
-            unsigned int y[8];
-            readInt(yPtr, i, y);
-
-            hashPublicKey(x, y, digest);
-
-            if(checkHash(digest)) {
-                setResultFound(i, false, x, y, digest);
-            }
-        }
-
-        if(compression == PointCompressionType::COMPRESSED || compression == PointCompressionType::BOTH) {
-            hashPublicKeyCompressed(x, readIntLSW(yPtr, i), digest);
-
-            if(checkHash(digest)) {
-                unsigned int y[8];
-                readInt(yPtr, i, y);
-                setResultFound(i, true, x, y, digest);
-            }
-        }
-
-        beginBatchAdd(_INC_X, x, chain, i, i, inverse);
-    }
-
-    doBatchInverse(inverse);
-
-    for(int i = pointsPerThread - 1; i >= 0; i--) {
-
-        unsigned int newX[8];
-        unsigned int newY[8];
-
-        completeBatchAdd(_INC_X, _INC_Y, xPtr, yPtr, i, i, chain, inverse, newX, newY);
-
-        writeInt(xPtr, i, newX);
-        writeInt(yPtr, i, newY);
-    }
-}
-
-__device__ void doIterationWithDouble(int pointsPerThread, int compression)
-{
-    unsigned int *chain = _CHAIN[0];
-    unsigned int *xPtr = ec::getXPtr();
-    unsigned int *yPtr = ec::getYPtr();
-
-    // Multiply together all (_Gx - x) and then invert
-    unsigned int inverse[8] = {0,0,0,0,0,0,0,1};
-    for(int i = 0; i < pointsPerThread; i++) {
-        unsigned int x[8];
-
-        unsigned int digest[5];
-
-        readInt(xPtr, i, x);
-
-        // uncompressed
-        if(compression == PointCompressionType::UNCOMPRESSED || compression == PointCompressionType::BOTH) {
-            unsigned int y[8];
-            readInt(yPtr, i, y);
-            hashPublicKey(x, y, digest);
-
-            if(checkHash(digest)) {
-                setResultFound(i, false, x, y, digest);
-            }
-        }
-
-        // compressed
-        if(compression == PointCompressionType::COMPRESSED || compression == PointCompressionType::BOTH) {
-
-            hashPublicKeyCompressed(x, readIntLSW(yPtr, i), digest);
-
-            if(checkHash(digest)) {
-
-                unsigned int y[8];
-                readInt(yPtr, i, y);
-
-                setResultFound(i, true, x, y, digest);
-            }
-        }
-
-        beginBatchAddWithDouble(_INC_X, _INC_Y, xPtr, chain, i, i, inverse);
-    }
-
-    doBatchInverse(inverse);
-
-    for(int i = pointsPerThread - 1; i >= 0; i--) {
-
-        unsigned int newX[8];
-        unsigned int newY[8];
-
-        completeBatchAddWithDouble(_INC_X, _INC_Y, xPtr, yPtr, i, i, chain, inverse, newX, newY);
-
-        writeInt(xPtr, i, newX);
-        writeInt(yPtr, i, newY);
-    }
-}
-
-/**
-* Performs a single iteration
-*/
-__global__ void keyFinderKernel(int points, int compression)
-{
-    doIteration(points, compression);
-}
-
-__global__ void keyFinderKernelWithDouble(int points, int compression)
-{
-    doIterationWithDouble(points, compression);
-}
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaKeySearchDevice.h b/CudaKeySearchDevice/CudaKeySearchDevice.h
deleted file mode 100644
index fb8d194..0000000
--- a/CudaKeySearchDevice/CudaKeySearchDevice.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef _CUDA_KEY_SEARCH_DEVICE
-#define _CUDA_KEY_SEARCH_DEVICE
-
-#include "KeySearchDevice.h"
-#include <vector>
-#include <cuda_runtime.h>
-#include "secp256k1.h"
-#include "CudaDeviceKeys.h"
-#include "CudaHashLookup.h"
-#include "CudaAtomicList.h"
-#include "cudaUtil.h"
-
-// Structures that exist on both host and device side
-struct CudaDeviceResult {
-    int thread;
-    int block;
-    int idx;
-    bool compressed;
-    unsigned int x[8];
-    unsigned int y[8];
-    unsigned int digest[5];
-};
-
-class CudaKeySearchDevice : public KeySearchDevice {
-
-private:
-
-    int _device;
-
-    int _blocks;
-
-    int _threads;
-
-    int _pointsPerThread;
-
-    int _compression;
-
-    std::vector<KeySearchResult> _results;
-
-    std::string _deviceName;
-
-    secp256k1::uint256 _startExponent;
-
-    uint64_t _iterations;
-
-    void cudaCall(cudaError_t err);
-
-    void generateStartingPoints();
-
-    CudaDeviceKeys _deviceKeys;
-
-    CudaAtomicList _resultList;
-
-    CudaHashLookup _targetLookup;
-
-    void getResultsInternal();
-
-    std::vector<hash160> _targets;
-
-    bool isTargetInList(const unsigned int hash[5]);
-    
-    void removeTargetFromList(const unsigned int hash[5]);
-
-    uint32_t getPrivateKeyOffset(int thread, int block, int point);
-
-    secp256k1::uint256 _stride;
-
-    bool verifyKey(const secp256k1::uint256 &privateKey, const secp256k1::ecpoint &publicKey, const unsigned int hash[5], bool compressed);
-
-public:
-
-    CudaKeySearchDevice(int device, int threads, int pointsPerThread, int blocks = 0);
-
-    virtual void init(const secp256k1::uint256 &start, int compression, const secp256k1::uint256 &stride);
-
-    virtual void doStep();
-
-    virtual void setTargets(const std::set<KeySearchTarget> &targets);
-
-    virtual size_t getResults(std::vector<KeySearchResult> &results);
-
-    virtual uint64_t keysPerStep();
-
-    virtual std::string getDeviceName();
-
-    virtual void getMemoryInfo(uint64_t &freeMem, uint64_t &totalMem);
-
-    virtual secp256k1::uint256 getNextKey();
-};
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaKeySearchDevice.vcxproj b/CudaKeySearchDevice/CudaKeySearchDevice.vcxproj
deleted file mode 100644
index 490c53e..0000000
--- a/CudaKeySearchDevice/CudaKeySearchDevice.vcxproj
+++ /dev/null
@@ -1,119 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="CudaKeySearchDevice.cpp" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="CudaAtomicList.cuh" />
-    <ClInclude Include="CudaAtomicList.h" />
-    <ClInclude Include="cudabridge.h" />
-    <ClInclude Include="CudaKeySearchDevice.h" />
-    <ClInclude Include="CudaDeviceKeys.cuh" />
-    <ClInclude Include="CudaDeviceKeys.h" />
-    <ClInclude Include="CudaHashLookup.cuh" />
-    <ClInclude Include="CudaHashLookup.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <CudaCompile Include="CudaAtomicList.cu" />
-    <CudaCompile Include="cudabridge.cu" />
-    <CudaCompile Include="CudaKeySearchDevice.cu" />
-    <CudaCompile Include="CudaDeviceKeys.cu" />
-    <CudaCompile Include="CudaHashLookup.cu" />
-  </ItemGroup>
-  <ItemGroup>
-    <ProjectReference Include="..\Logger\Logger.vcxproj">
-      <Project>{150af404-1f80-4a13-855b-4383c4a3326f}</Project>
-    </ProjectReference>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{CCA3D02C-5E5A-4A24-B34B-5961DFA93946}</ProjectGuid>
-    <RootNamespace>CudaKeySearchDevice</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.props" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="..\BitCrack.props" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="..\BitCrack.props" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalUsingDirectories>%(AdditionalUsingDirectories)</AdditionalUsingDirectories>
-      <AdditionalIncludeDirectories>$(SolutionDir)\secp256k1lib;$(SolutionDir)\KeyFinderLib;$(SolutionDir)\Logger;$(SolutionDir)\Util;$(SolutionDir)\CudaMath;$(SolutionDir)\cudaUtil;$(SolutionDir)\AddressUtil;$(CUDA_INCLUDE)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <CudaCompile>
-      <TargetMachinePlatform>64</TargetMachinePlatform>
-      <GenerateRelocatableDeviceCode>true</GenerateRelocatableDeviceCode>
-      <CodeGeneration>%(CodeGeneration)</CodeGeneration>
-    </CudaCompile>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalUsingDirectories>%(AdditionalUsingDirectories)</AdditionalUsingDirectories>
-      <AdditionalIncludeDirectories>$(SolutionDir)\secp256k1lib;$(SolutionDir)\KeyFinderLib;$(SolutionDir)\Logger;$(SolutionDir)\Util;$(SolutionDir)\CudaMath;$(SolutionDir)\cudaUtil;$(SolutionDir)\AddressUtil;$(CUDA_INCLUDE)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <CudaCompile>
-      <TargetMachinePlatform>64</TargetMachinePlatform>
-      <GenerateRelocatableDeviceCode>true</GenerateRelocatableDeviceCode>
-      <CodeGeneration>%(CodeGeneration)</CodeGeneration>
-    </CudaCompile>
-  </ItemDefinitionGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.targets" />
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/CudaKeySearchDevice/Makefile b/CudaKeySearchDevice/Makefile
deleted file mode 100644
index 5782c63..0000000
--- a/CudaKeySearchDevice/Makefile
+++ /dev/null
@@ -1,22 +0,0 @@
-NAME=CudaKeySearchDevice
-CPPSRC:=$(wildcard *.cpp)
-CUSRC:=$(wildcard *.cu)
-
-all:	cuda
-
-cuda:
-	for file in ${CPPSRC} ; do\
-		${CXX} -c $$file ${INCLUDE} -I${CUDA_INCLUDE} ${CXXFLAGS};\
-	done
-
-	for file in ${CUSRC} ; do\
-		${NVCC} -c $$file -o $$file".o" ${NVCCFLAGS} -rdc=true ${INCLUDE} -I${CUDA_INCLUDE} -I${CUDA_MATH};\
-	done
-
-	${NVCC} -dlink -o cuda_libs.o *.cu.o -lcudadevrt -lcudart
-
-	ar rvs ${LIBDIR}/lib$(NAME).a *.o
-
-clean:
-	rm -f *.o *.cu.o
-	rm -f *.a
\ No newline at end of file
diff --git a/CudaKeySearchDevice/cudabridge.cu b/CudaKeySearchDevice/cudabridge.cu
deleted file mode 100644
index 33325d9..0000000
--- a/CudaKeySearchDevice/cudabridge.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "cudabridge.h"
-
-
-__global__ void keyFinderKernel(int points, int compression);
-__global__ void keyFinderKernelWithDouble(int points, int compression);
-
-void callKeyFinderKernel(int blocks, int threads, int points, bool useDouble, int compression)
-{
-	if(useDouble) {
-		keyFinderKernelWithDouble <<<blocks, threads >>>(points, compression);
-	} else {
-		keyFinderKernel <<<blocks, threads>>> (points, compression);
-	}
-	waitForKernel();
-}
-
-
-void waitForKernel()
-{
-    // Check for kernel launch error
-    cudaError_t err = cudaGetLastError();
-
-    if(err != cudaSuccess) {
-        throw cuda::CudaException(err);
-    }
- 
-    // Wait for kernel to complete
-    err = cudaDeviceSynchronize();
-	fflush(stdout);
-	if(err != cudaSuccess) {
-		throw cuda::CudaException(err);
-	}
-}
\ No newline at end of file
diff --git a/CudaKeySearchDevice/cudabridge.h b/CudaKeySearchDevice/cudabridge.h
deleted file mode 100644
index eaafe3a..0000000
--- a/CudaKeySearchDevice/cudabridge.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef _BRIDGE_H
-#define _BRIDGE_H
-
-#include<cuda.h>
-#include<cuda_runtime.h>
-#include<string>
-#include "cudaUtil.h"
-#include "secp256k1.h"
-
-
-void callKeyFinderKernel(int blocks, int threads, int points, bool useDouble, int compression);
-
-void waitForKernel();
-
-cudaError_t setIncrementorPoint(const secp256k1::uint256 &x, const secp256k1::uint256 &y);
-cudaError_t allocateChainBuf(unsigned int count);
-void cleanupChainBuf();
-
-#endif
\ No newline at end of file
diff --git a/KeyFinder/ConfigFile.cpp b/KeyFinder/ConfigFile.cpp
index 83098ae..4cc7fc2 100644
--- a/KeyFinder/ConfigFile.cpp
+++ b/KeyFinder/ConfigFile.cpp
@@ -43,4 +43,4 @@ std::map<std::string, ConfigFileEntry> ConfigFileReader::read()
     }
 
     return entries;
-}
\ No newline at end of file
+}
diff --git a/KeyFinder/ConfigFile.h b/KeyFinder/ConfigFile.h
index ed48116..312d21b 100644
--- a/KeyFinder/ConfigFile.h
+++ b/KeyFinder/ConfigFile.h
@@ -1,5 +1,5 @@
-#ifndef _CONFIG_FILE_H
-#define _CONFIG_FILE_H
+#ifndef CONFIG_FILE_H
+#define CONFIG_FILE_H
 
 #include <string>
 #include <map>
@@ -34,4 +34,4 @@ class ConfigFileReader {
     std::map<std::string, ConfigFileEntry> read();
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/KeyFinder/DeviceManager.cpp b/KeyFinder/DeviceManager.cpp
index cec6c77..9e9e081 100644
--- a/KeyFinder/DeviceManager.cpp
+++ b/KeyFinder/DeviceManager.cpp
@@ -1,61 +1,27 @@
 #include "DeviceManager.h"
-
-#ifdef BUILD_CUDA
-#include "cudaUtil.h"
-#endif
-
-#ifdef BUILD_OPENCL
 #include "clutil.h"
-#endif
 
 std::vector<DeviceManager::DeviceInfo> DeviceManager::getDevices()
 {
-    int deviceId = 0;
 
     std::vector<DeviceManager::DeviceInfo> devices;
 
-#ifdef BUILD_CUDA
-    // Get CUDA devices
-    try {
-        std::vector<cuda::CudaDeviceInfo> cudaDevices = cuda::getDevices();
-
-        for(int i = 0; i < cudaDevices.size(); i++) {
-            DeviceManager::DeviceInfo device;
-            device.name = cudaDevices[i].name;
-            device.type = DeviceType::CUDA;
-            device.id = deviceId;
-            device.physicalId = cudaDevices[i].id;
-            device.memory = cudaDevices[i].mem;
-            device.computeUnits = cudaDevices[i].mpCount;
-            devices.push_back(device);
-
-            deviceId++;
-        }
-    } catch(cuda::CudaException ex) {
-        throw DeviceManager::DeviceManagerException(ex.msg);
-    }
-#endif
-
-#ifdef BUILD_OPENCL
-    // Get OpenCL devices
     try {
         std::vector<cl::CLDeviceInfo> clDevices = cl::getDevices();
 
-        for(int i = 0; i < clDevices.size(); i++) {
+        for(size_t i = 0; i < clDevices.size(); i++) {
             DeviceManager::DeviceInfo device;
             device.name = clDevices[i].name;
-            device.type = DeviceType::OpenCL;
-            device.id = deviceId;
+            device.id = i;
             device.physicalId = (uint64_t)clDevices[i].id;
             device.memory = clDevices[i].mem;
             device.computeUnits = clDevices[i].cores;
+            device.maxWorkingGroupSize = clDevices[i].maxWorkingGroupSize;
             devices.push_back(device);
-            deviceId++;
         }
     } catch(cl::CLException ex) {
         throw DeviceManager::DeviceManagerException(ex.msg);
     }
-#endif
 
     return devices;
-}
\ No newline at end of file
+}
diff --git a/KeyFinder/DeviceManager.h b/KeyFinder/DeviceManager.h
index 5f76fd4..8a367ba 100644
--- a/KeyFinder/DeviceManager.h
+++ b/KeyFinder/DeviceManager.h
@@ -1,5 +1,5 @@
-#ifndef _DEVICE_MANAGER_H
-#define _DEVICE_MANAGER_H
+#ifndef DEVICE_MANAGER_H
+#define DEVICE_MANAGER_H
 
 #include <stdint.h>
 #include <string>
@@ -18,17 +18,7 @@ class DeviceManagerException {
     }
 };
 
-class DeviceType {
-public:
-    enum {
-        CUDA = 0,
-        OpenCL
-    };
-};
-
-
 typedef struct {
-    int type;
     int id;
 
     // General device info
@@ -36,16 +26,12 @@ typedef struct {
     std::string name;
     uint64_t memory;
     int computeUnits;
+    int maxWorkingGroupSize;
 
-    // CUDA device info
-    int cudaMajor;
-    int cudaMinor;
-    int cudaCores;
 }DeviceInfo;
 
 std::vector<DeviceInfo> getDevices();
 
 }
 
-
-#endif
\ No newline at end of file
+#endif
diff --git a/KeyFinder/KeyFinder.vcxproj b/KeyFinder/KeyFinder.vcxproj
index 2a70f13..79d133d 100644
--- a/KeyFinder/KeyFinder.vcxproj
+++ b/KeyFinder/KeyFinder.vcxproj
@@ -29,26 +29,26 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>NotSet</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>NotSet</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>NotSet</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>NotSet</CharacterSet>
   </PropertyGroup>
@@ -109,7 +109,7 @@
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>EnableAllWarnings</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;BUILD_CUDA;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>$(SolutionDir)\KeyFinderLib;$(SolutionDir)\util;$(SolutionDir)\AddressUtil;$(SolutionDir)\secp256k1lib;$(SolutionDir)\CmdParse;$(SolutionDir)\cudaDeviceContext;$(CUDA_INCLUDE);$(SolutionDir)\cudaUtil;$(SolutionDir)\Logger;$(SolutionDir)\CudaKeySearchDevice</AdditionalIncludeDirectories>
diff --git a/KeyFinder/main.cpp b/KeyFinder/main.cpp
index 22c699d..8695f98 100644
--- a/KeyFinder/main.cpp
+++ b/KeyFinder/main.cpp
@@ -1,6 +1,8 @@
 #include <stdio.h>
 #include <fstream>
 #include <iostream>
+#include <string> 
+
 
 #include "KeyFinder.h"
 #include "AddressUtil.h"
@@ -9,18 +11,10 @@
 #include "CmdParse.h"
 #include "Logger.h"
 #include "ConfigFile.h"
-
 #include "DeviceManager.h"
-
-#ifdef BUILD_CUDA
-#include "CudaKeySearchDevice.h"
-#endif
-
-#ifdef BUILD_OPENCL
 #include "CLKeySearchDevice.h"
-#endif
 
-typedef struct {
+struct RunConfig{
     // startKey is the first key. We store it so that if the --continue
     // option is used, the correct progress is displayed. startKey and
     // nextKey are only equal at the very beginning. nextKey gets saved
@@ -38,7 +32,7 @@ typedef struct {
     unsigned int blocks = 0;
     unsigned int pointsPerThread = 0;
     
-    int compression = PointCompressionType::COMPRESSED;
+    int compressionMode = PointCompressionType::COMPRESSED;
  
     std::vector<std::string> targets;
 
@@ -55,7 +49,7 @@ typedef struct {
     secp256k1::uint256 stride = 1;
 
     bool follow = false;
-}RunConfig;
+};
 
 static RunConfig _config;
 
@@ -64,7 +58,6 @@ std::vector<DeviceManager::DeviceInfo> _devices;
 void writeCheckpoint(secp256k1::uint256 nextKey);
 
 static uint64_t _lastUpdate = 0;
-static uint64_t _runningTime = 0;
 static uint64_t _startTime = 0;
 
 /**
@@ -75,32 +68,18 @@ void resultCallback(KeySearchResult info)
 	if(_config.resultsFile.length() != 0) {
 		Logger::log(LogLevel::Info, "Found key for address '" + info.address + "'. Written to '" + _config.resultsFile + "'");
 
-		std::string s = info.address + " " + info.privateKey.toString(16) + " " + info.publicKey.toString(info.compressed);
+		std::string s = info.address + " " + info.privateKey.toString() + " " + info.publicKey.toString(info.compressed);
 		util::appendToFile(_config.resultsFile, s);
 
 		return;
 	}
 
-	std::string logStr = "Address:     " + info.address + "\n";
-	logStr += "Private key: " + info.privateKey.toString(16) + "\n";
-	logStr += "Compressed:  ";
+	std::string logStr = "\n\nAddress:     " + info.address + "\n";
+	logStr +=              "Private key: " + info.privateKey.toString() + "\n";
+    logStr +=              "Compressed:  "; logStr += (info.compressed) ? "yes\n" : "no\n";
+    logStr +=              "Public key:  "; logStr += (info.compressed) ? info.publicKey.toString(true) + "\n" : info.publicKey.x.toString() + "\n            " + info.publicKey.y.toString() + "\n";
 
-	if(info.compressed) {
-		logStr += "yes\n";
-	} else {
-		logStr += "no\n";
-	}
-
-	logStr += "Public key:  \n";
-
-	if(info.compressed) {
-		logStr += info.publicKey.toString(true) + "\n";
-	} else {
-		logStr += info.publicKey.x.toString(16) + "\n";
-		logStr += info.publicKey.y.toString(16) + "\n";
-	}
-
-	Logger::log(LogLevel::Info, logStr);
+	Logger::log(LogLevel::Notify, logStr);
 }
 
 /**
@@ -120,26 +99,18 @@ void statusCallback(KeySearchStatus info)
 
 	std::string timeStr = "[" + util::formatSeconds((unsigned int)((_config.elapsed + info.totalTime) / 1000)) + "]";
 
-	std::string usedMemStr = util::format((info.deviceMemory - info.freeMemory) /(1024 * 1024));
-
-	std::string totalMemStr = util::format(info.deviceMemory / (1024 * 1024));
-
     std::string targetStr = util::format(info.targets) + " target" + (info.targets > 1 ? "s" : "");
 
-
 	// Fit device name in 16 characters, pad with spaces if less
-	std::string devName = info.deviceName.substr(0, 16);
-	devName += std::string(16 - devName.length(), ' ');
-
     const char *formatStr = NULL;
 
     if(_config.follow) {
-        formatStr = "%s %s/%sMB | %s %s %s %s\n";
+        formatStr = "%s %s %s %s\n";
     } else {
-        formatStr = "\r%s %s / %sMB | %s %s %s %s";
+        formatStr = "\r%s %s %s %s";
     }
 
-	printf(formatStr, devName.c_str(), usedMemStr.c_str(), totalMemStr.c_str(), targetStr.c_str(), speedStr.c_str(), totalStr.c_str(), timeStr.c_str());
+	printf(formatStr, targetStr.c_str(), speedStr.c_str(), totalStr.c_str(), timeStr.c_str());
 
     if(_config.checkpointFile.length() > 0) {
         uint64_t t = util::getSystemTime();
@@ -192,29 +163,29 @@ void usage()
     printf("BitCrack OPTIONS [TARGETS]\n");
     printf("Where TARGETS is one or more addresses\n\n");
 	
-    printf("--help                  Display this message\n");
-    printf("-c, --compressed        Use compressed points\n");
-    printf("-u, --uncompressed      Use Uncompressed points\n");
-    printf("--compression  MODE     Specify compression where MODE is\n");
-    printf("                          COMPRESSED or UNCOMPRESSED or BOTH\n");
-    printf("-d, --device ID         Use device ID\n");
-    printf("-b, --blocks N          N blocks\n");
-    printf("-t, --threads N         N threads per block\n");
-    printf("-p, --points N          N points per thread\n");
-    printf("-i, --in FILE           Read addresses from FILE, one per line\n");
-    printf("-o, --out FILE          Write keys to FILE\n");
-    printf("-f, --follow            Follow text output\n");
-    printf("--list-devices          List available devices\n");
-    printf("--keyspace KEYSPACE     Specify the keyspace:\n");
-    printf("                          START:END\n");
-    printf("                          START:+COUNT\n");
-    printf("                          START\n");
-    printf("                          :END\n"); 
-    printf("                          :+COUNT\n");
-    printf("                        Where START, END, COUNT are in hex format\n");
-    printf("--stride N              Increment by N keys at a time\n");
-    printf("--share M/N             Divide the keyspace into N equal shares, process the Mth share\n");
-    printf("--continue FILE         Save/load progress from FILE\n");
+    printf("-?, -h, --help            Display this message\n");
+    printf("-c, --compressed          Use compressed points\n");
+    printf("-u, --uncompressed        Use Uncompressed points\n");
+    printf("--compression  MODE       Specify compression where MODE is\n");
+    printf("                            COMPRESSED or UNCOMPRESSED or BOTH\n");
+    printf("-d, --device ID           Use device ID\n");
+    printf("-b, --blocks N            N blocks\n");
+    printf("-t, --threads N           N threads per block\n");
+    printf("-p, --points N            N points per thread\n");
+    printf("-i, --in FILE             Read addresses from FILE, one per line\n");
+    printf("-o, --out FILE            Write keys to FILE\n");
+    printf("-f, --follow              Follow text output\n");
+    printf("--list-devices            List available devices\n");
+    printf("-k, --keyspace KEYSPACE   Specify the keyspace:\n");
+    printf("                            START:END\n");
+    printf("                            START:+COUNT\n");
+    printf("                            START\n");
+    printf("                            :END\n"); 
+    printf("                            :+COUNT\n");
+    printf("                          Where START, END, COUNT are in hex format\n");
+    printf("--stride N                Increment by N keys at a time\n");
+    printf("--share M/N               Divide the keyspace into N equal shares, process the Mth share\n");
+    printf("--continue FILE           Save/load progress from FILE\n");
 }
 
 
@@ -222,36 +193,26 @@ void usage()
  Finds default parameters depending on the device
  */
 typedef struct {
-	int threads;
-	int blocks;
-	int pointsPerThread;
+	unsigned int threads;
+	unsigned int blocks;
+	unsigned int pointsPerThread;
+    unsigned int compressionMode;
 }DeviceParameters;
 
 DeviceParameters getDefaultParameters(const DeviceManager::DeviceInfo &device)
 {
-	DeviceParameters p;
-	p.threads = 256;
-    p.blocks = 32;
-	p.pointsPerThread = 32;
+	DeviceParameters parameters;
+	parameters.threads = 256;
+    parameters.blocks = 32;
+	parameters.pointsPerThread = 32;
+    parameters.compressionMode = PointCompressionType::COMPRESSED;
 
-	return p;
+	return parameters;
 }
 
-static KeySearchDevice *getDeviceContext(DeviceManager::DeviceInfo &device, int blocks, int threads, int pointsPerThread)
+static KeySearchDevice *getDeviceContext(DeviceManager::DeviceInfo &device, int blocks, int threads, int pointsPerThread, int compressionMode)
 {
-#ifdef BUILD_CUDA
-    if(device.type == DeviceManager::DeviceType::CUDA) {
-        return new CudaKeySearchDevice((int)device.physicalId, threads, pointsPerThread, blocks);
-    }
-#endif
-
-#ifdef BUILD_OPENCL
-    if(device.type == DeviceManager::DeviceType::OpenCL) {
-        return new CLKeySearchDevice(device.physicalId, threads, pointsPerThread, blocks);
-    }
-#endif
-
-    return NULL;
+     return new CLKeySearchDevice(device.physicalId, threads, pointsPerThread, blocks, compressionMode);
 }
 
 static void printDeviceList(const std::vector<DeviceManager::DeviceInfo> &devices)
@@ -274,7 +235,7 @@ bool readAddressesFromFile(const std::string &fileName, std::vector<std::string>
     }
 }
 
-int parseCompressionString(const std::string &s)
+PointCompressionType::Value parseCompressionString(const std::string &s)
 {
     std::string comp = util::toLower(s);
 
@@ -302,26 +263,26 @@ static std::string getCompressionString(int mode)
         return "uncompressed";
     case PointCompressionType::COMPRESSED:
         return "compressed";
+    default: 
+        throw std::string("Invalid compression setting '" + util::format(mode) + "'");
     }
-
-    throw std::string("Invalid compression setting '" + util::format(mode) + "'");
 }
 
 void writeCheckpoint(secp256k1::uint256 nextKey)
 {
-    std::ofstream tmp(_config.checkpointFile, std::ios::out);
-
-    tmp << "start=" << _config.startKey.toString() << std::endl;
-    tmp << "next=" << nextKey.toString() << std::endl;
-    tmp << "end=" << _config.endKey.toString() << std::endl;
-    tmp << "blocks=" << _config.blocks << std::endl;
-    tmp << "threads=" << _config.threads << std::endl;
-    tmp << "points=" << _config.pointsPerThread << std::endl;
-    tmp << "compression=" << getCompressionString(_config.compression) << std::endl;
-    tmp << "device=" << _config.device << std::endl;
-    tmp << "elapsed=" << (_config.elapsed + util::getSystemTime() - _startTime) << std::endl;
-    tmp << "stride=" << _config.stride.toString();
-    tmp.close();
+    std::ofstream fileStream(_config.checkpointFile, std::ios::out);
+
+    fileStream << "start=" << _config.startKey.toString() << "\n";
+    fileStream << "next=" << nextKey.toString() << "\n";
+    fileStream << "end=" << _config.endKey.toString() << "\n";
+    fileStream << "blocks=" << _config.blocks << "\n";
+    fileStream << "threads=" << _config.threads << "\n";
+    fileStream << "points=" << _config.pointsPerThread << "\n";
+    fileStream << "compression=" << getCompressionString(_config.compressionMode) << "\n";
+    fileStream << "device=" << _config.device << "\n";
+    fileStream << "elapsed=" << (_config.elapsed + util::getSystemTime() - _startTime) << "\n";
+    fileStream << "stride=" << _config.stride.toString();
+    fileStream.close();
 }
 
 void readCheckpointFile()
@@ -354,7 +315,7 @@ void readCheckpointFile()
         _config.pointsPerThread = util::parseUInt32(entries["points"].value);
     }
     if(entries.find("compression") != entries.end()) {
-        _config.compression = parseCompressionString(entries["compression"].value);
+        _config.compressionMode = parseCompressionString(entries["compression"].value);
     }
     if(entries.find("elapsed") != entries.end()) {
         _config.elapsed = util::parseUInt32(entries["elapsed"].value);
@@ -368,15 +329,17 @@ void readCheckpointFile()
 
 int run()
 {
+    Logger::log(LogLevel::Info, "BitCrackOpenCL\n");
+
     if(_config.device < 0 || _config.device >= _devices.size()) {
         Logger::log(LogLevel::Error, "device " + util::format(_config.device) + " does not exist");
         return 1;
     }
 
-    Logger::log(LogLevel::Info, "Compression: " + getCompressionString(_config.compression));
+    Logger::log(LogLevel::Info, "Compression: " + getCompressionString(_config.compressionMode));
     Logger::log(LogLevel::Info, "Starting at: " + _config.nextKey.toString());
     Logger::log(LogLevel::Info, "Ending at:   " + _config.endKey.toString());
-    Logger::log(LogLevel::Info, "Counting by: " + _config.stride.toString());
+    Logger::log(LogLevel::Info, "Counting by: " + _config.stride.toString() + "\n");
 
     try {
 
@@ -398,28 +361,32 @@ int run()
             _config.pointsPerThread = params.pointsPerThread;
         }
 
+        Logger::log(LogLevel::Info, "Threads: " + std::to_string(_config.threads));
+        Logger::log(LogLevel::Info, "Blocks: " + std::to_string(_config.blocks));
+        Logger::log(LogLevel::Info, "Points per Thread: " + std::to_string(_config.pointsPerThread));
+            
         // Get device context
-        KeySearchDevice *d = getDeviceContext(_devices[_config.device], _config.blocks, _config.threads, _config.pointsPerThread);
+        KeySearchDevice *keySearchDevice = getDeviceContext(_devices[_config.device], _config.blocks, _config.threads, _config.pointsPerThread, _config.compressionMode);
 
-        KeyFinder f(_config.nextKey, _config.endKey, _config.compression, d, _config.stride);
+        KeyFinder keyFinder(_config.nextKey, _config.endKey, _config.compressionMode, keySearchDevice, _config.stride);
 
-        f.setResultCallback(resultCallback);
-        f.setStatusInterval(_config.statusInterval);
-        f.setStatusCallback(statusCallback);
+        keyFinder.setResultCallback(resultCallback);
+        keyFinder.setStatusInterval(_config.statusInterval);
+        keyFinder.setStatusCallback(statusCallback);
 
-        f.init();
+        keyFinder.init();
 
         if(!_config.targetsFile.empty()) {
-            f.setTargets(_config.targetsFile);
+            keyFinder.setTargets(_config.targetsFile);
         } else {
-            f.setTargets(_config.targets);
+            keyFinder.setTargets(_config.targets);
         }
 
-        f.run();
+        keyFinder.run();
 
-        delete d;
+        delete keySearchDevice;
     } catch(KeySearchException ex) {
-        Logger::log(LogLevel::Info, "Error: " + ex.msg);
+        Logger::log(LogLevel::Info, "Error: " + ex.msg + ": " + ex.description);
         return 1;
     }
 
@@ -465,16 +432,16 @@ int main(int argc, char **argv)
 	bool optUncompressed = false;
     bool listDevices = false;
     bool optShares = false;
-    bool optThreads = false;
-    bool optBlocks = false;
-    bool optPoints = false;
 
     uint32_t shareIdx = 0;
     uint32_t numShares = 0;
 
-    // Catch --help first
     for(int i = 1; i < argc; i++) {
-        if(std::string(argv[i]) == "--help") {
+        if(
+            std::string(argv[i]) == "--help" ||
+            std::string(argv[i]) == "-h" ||
+            std::string(argv[i]) == "-?"
+        ) {
             usage();
             return 0;
         }
@@ -499,7 +466,6 @@ int main(int argc, char **argv)
 		return 0;
 	}
 
-
 	CmdParse parser;
 	parser.add("-d", "--device", true);
 	parser.add("-t", "--threads", true);
@@ -513,7 +479,7 @@ int main(int argc, char **argv)
 	parser.add("-o", "--out", true);
     parser.add("-f", "--follow", false);
     parser.add("", "--list-devices", false);
-    parser.add("", "--keyspace", true);
+    parser.add("-k", "--keyspace", true);
     parser.add("", "--continue", true);
     parser.add("", "--share", true);
     parser.add("", "--stride", true);
@@ -534,13 +500,10 @@ int main(int argc, char **argv)
 		try {
 			if(optArg.equals("-t", "--threads")) {
 				_config.threads = util::parseUInt32(optArg.arg);
-                optThreads = true;
             } else if(optArg.equals("-b", "--blocks")) {
                 _config.blocks = util::parseUInt32(optArg.arg);
-                optBlocks = true;
 			} else if(optArg.equals("-p", "--points")) {
 				_config.pointsPerThread = util::parseUInt32(optArg.arg);
-                optPoints = true;
 			} else if(optArg.equals("-d", "--device")) {
 				_config.device = util::parseUInt32(optArg.arg);
 			} else if(optArg.equals("-c", "--compressed")) {
@@ -548,7 +511,7 @@ int main(int argc, char **argv)
             } else if(optArg.equals("-u", "--uncompressed")) {
                 optUncompressed = true;
             } else if(optArg.equals("", "--compression")) {
-                _config.compression = parseCompressionString(optArg.arg);
+                _config.compressionMode = parseCompressionString(optArg.arg);
 			} else if(optArg.equals("-i", "--in")) {
 				_config.targetsFile = optArg.arg;
 			} else if(optArg.equals("-o", "--out")) {
@@ -557,7 +520,7 @@ int main(int argc, char **argv)
                 listDevices = true;
             } else if(optArg.equals("", "--continue")) {
                 _config.checkpointFile = optArg.arg;
-            } else if(optArg.equals("", "--keyspace")) {
+            } else if(optArg.equals("-k", "--keyspace")) {
                 secp256k1::uint256 start;
                 secp256k1::uint256 end;
 
@@ -665,16 +628,16 @@ int main(int argc, char **argv)
 
 	// Check option for compressed, uncompressed, or both
 	if(optCompressed && optUncompressed) {
-		_config.compression = PointCompressionType::BOTH;
+		_config.compressionMode = PointCompressionType::BOTH;
 	} else if(optCompressed) {
-		_config.compression = PointCompressionType::COMPRESSED;
+		_config.compressionMode = PointCompressionType::COMPRESSED;
 	} else if(optUncompressed) {
-		_config.compression = PointCompressionType::UNCOMPRESSED;
+		_config.compressionMode = PointCompressionType::UNCOMPRESSED;
 	}
 
-    if(_config.checkpointFile.length() > 0) {
+    if(_config.checkpointFile.length() != 0) {
         readCheckpointFile();
     }
 
     return run();
-}
\ No newline at end of file
+}
diff --git a/KeyFinderLib/KeyFinder.cpp b/KeyFinderLib/KeyFinder.cpp
index 19f56cb..0f14280 100644
--- a/KeyFinderLib/KeyFinder.cpp
+++ b/KeyFinderLib/KeyFinder.cpp
@@ -7,17 +7,6 @@
 
 #include "Logger.h"
 
-
-void KeyFinder::defaultResultCallback(KeySearchResult result)
-{
-	// Do nothing
-}
-
-void KeyFinder::defaultStatusCallback(KeySearchStatus status)
-{
-	// Do nothing
-}
-
 KeyFinder::KeyFinder(const secp256k1::uint256 &startKey, const secp256k1::uint256 &endKey, int compression, KeySearchDevice* device, const secp256k1::uint256 &stride)
 {
 	_total = 0;
@@ -46,7 +35,7 @@ KeyFinder::~KeyFinder()
 void KeyFinder::setTargets(std::vector<std::string> &targets)
 {
 	if(targets.size() == 0) {
-		throw KeySearchException("Requires at least 1 target");
+		throw KeySearchException("KEYSEARCH_NO_TARGET", "Requires at least 1 target");
 	}
 
 	_targets.clear();
@@ -55,7 +44,7 @@ void KeyFinder::setTargets(std::vector<std::string> &targets)
 	for(unsigned int i = 0; i < targets.size(); i++) {
 
 		if(!Address::verifyAddress(targets[i])) {
-			throw KeySearchException("Invalid address '" + targets[i] + "'");
+			throw KeySearchException("KEYSEARCH_INVALID_ADDRESS", "Invalid address '" + targets[i] + "'");
 		}
 
 		KeySearchTarget t;
@@ -71,10 +60,11 @@ void KeyFinder::setTargets(std::vector<std::string> &targets)
 void KeyFinder::setTargets(std::string targetsFile)
 {
 	std::ifstream inFile(targetsFile.c_str());
+	unsigned int invalidAddressCount = 0;
 
 	if(!inFile.is_open()) {
 		Logger::log(LogLevel::Error, "Unable to open '" + targetsFile + "'");
-		throw KeySearchException();
+		throw KeySearchException("FILE", "Unable to open '" + targetsFile + "'");
 	}
 
 	_targets.clear();
@@ -85,10 +75,10 @@ void KeyFinder::setTargets(std::string targetsFile)
 		util::removeNewline(line);
         line = util::trim(line);
 
-		if(line.length() > 0) {
+		if(line.length() != 0) {
 			if(!Address::verifyAddress(line)) {
-				Logger::log(LogLevel::Error, "Invalid address '" + line + "'");
-				throw KeySearchException();
+				invalidAddressCount++;
+				continue;
 			}
 
 			KeySearchTarget t;
@@ -98,8 +88,9 @@ void KeyFinder::setTargets(std::string targetsFile)
 			_targets.insert(t);
 		}
 	}
-	Logger::log(LogLevel::Info, util::formatThousands(_targets.size()) + " addresses loaded ("
-		+ util::format("%.1f", (double)(sizeof(KeySearchTarget) * _targets.size()) / (double)(1024 * 1024)) + "MB)");
+	Logger::log(LogLevel::Info, util::formatThousands(_targets.size()) + " address(es) loaded ("
+		+ util::format("%.1f", (double)(sizeof(KeySearchTarget) * _targets.size()) / (double)(1024 * 1024)) + "MB)"
+		+ "\n" + util::formatThousands(invalidAddressCount) + " address(es) ignored");
 
     _device->setTargets(_targets);
 }
@@ -197,15 +188,6 @@ void KeyFinder::run()
 
 			info.totalTime = _totalTime;
 
-			uint64_t freeMem = 0;
-
-			uint64_t totalMem = 0;
-
-			_device->getMemoryInfo(freeMem, totalMem);
-
-			info.freeMemory = freeMem;
-			info.deviceMemory = totalMem;
-			info.deviceName = _device->getDeviceName();
 			info.targets = _targets.size();
             info.nextKey = getNextKey();
 
diff --git a/KeyFinderLib/KeyFinder.h b/KeyFinderLib/KeyFinder.h
index c8150f7..1bc67cd 100644
--- a/KeyFinderLib/KeyFinder.h
+++ b/KeyFinderLib/KeyFinder.h
@@ -1,5 +1,5 @@
-#ifndef _KEY_FINDER_H
-#define _KEY_FINDER_H
+#ifndef KEY_FINDER_H
+#define KEY_FINDER_H
 
 #include <stdint.h>
 #include <vector>
diff --git a/KeyFinderLib/KeyFinderLib.vcxproj b/KeyFinderLib/KeyFinderLib.vcxproj
index dd02253..6eb479a 100644
--- a/KeyFinderLib/KeyFinderLib.vcxproj
+++ b/KeyFinderLib/KeyFinderLib.vcxproj
@@ -5,6 +5,10 @@
       <Configuration>Debug</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|x64">
+      <Configuration>Performance Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|x64">
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
@@ -20,14 +24,22 @@
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
     <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PreferredToolArchitecture>x64</PreferredToolArchitecture>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -36,6 +48,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -44,6 +60,9 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <WarningLevel>Level4</WarningLevel>
@@ -57,6 +76,26 @@
       <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PreprocessorDefinitions>WIN32;WIN64;_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(SolutionDir)secp256k1lib;$(SolutionDir)AddressUtil;$(SolutionDir)Logger;$(SolutionDir)util;$(SolutionDir)KeySearchDevice;$(SolutionDir)clUtil;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
       <WarningLevel>Level4</WarningLevel>
diff --git a/KeyFinderLib/KeyFinderShared.h b/KeyFinderLib/KeyFinderShared.h
index a6973f1..049d426 100644
--- a/KeyFinderLib/KeyFinderShared.h
+++ b/KeyFinderLib/KeyFinderShared.h
@@ -1,5 +1,5 @@
-#ifndef _KEY_FINDER_SHARED_H
-#define _KEY_FINDER_SHARED_H
+#ifndef KEY_FINDER_SHARED_H
+#define KEY_FINDER_SHARED_H
 
 namespace PointCompressionType {
 	enum Value {
@@ -20,14 +20,4 @@ struct KeyFinderDeviceResult {
 	unsigned int digest[5];
 };
 
-//typedef struct hash160 {
-//
-//	unsigned int h[5];
-//
-//	hash160(const unsigned int hash[5])
-//	{
-//		memcpy(h, hash, sizeof(unsigned int) * 5);
-//	}
-//}hash160;
-
-#endif
\ No newline at end of file
+#endif
diff --git a/KeyFinderLib/KeySearchDevice.h b/KeyFinderLib/KeySearchDevice.h
index 4139dc9..26621d2 100644
--- a/KeyFinderLib/KeySearchDevice.h
+++ b/KeyFinderLib/KeySearchDevice.h
@@ -1,5 +1,5 @@
-#ifndef _KEY_SEARCH_DEVICE_H
-#define _KEY_SEARCH_DEVICE_H
+#ifndef KEY_SEARCH_DEVICE_H
+#define KEY_SEARCH_DEVICE_H
 
 #include <vector>
 #include <set>
@@ -16,12 +16,14 @@ class KeySearchException {
 
     }
 
-    KeySearchException(const std::string &msg)
+    KeySearchException(const std::string &msg, const std::string &description)
     {
         this->msg = msg;
+        this->description = description;
     }
 
     std::string msg;
+    std::string description;
 };
 
 
@@ -38,6 +40,8 @@ class KeySearchDevice {
 
 public:
 
+    virtual ~KeySearchDevice() {};
+
     // Initialize the device
     virtual void init(const secp256k1::uint256 &start, int compression, const secp256k1::uint256 &stride) = 0;
 
@@ -62,4 +66,4 @@ class KeySearchDevice {
     virtual secp256k1::uint256 getNextKey() = 0;
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/KeyFinderLib/KeySearchTypes.h b/KeyFinderLib/KeySearchTypes.h
index 9ff12d3..cfe8a89 100644
--- a/KeyFinderLib/KeySearchTypes.h
+++ b/KeyFinderLib/KeySearchTypes.h
@@ -1,8 +1,8 @@
-#ifndef _KEY_FINDER_TYPES
-#define _KEY_FINDER_TYPES
+#ifndef KEY_FINDER_TYPES
+#define KEY_FINDER_TYPES
 
-#include<stdint.h>
-#include<string>
+#include <stdint.h>
+#include <string>
 #include "secp256k1.h"
 
 namespace PointCompressionType {
@@ -29,9 +29,6 @@ typedef struct {
     double speed;
     uint64_t total;
     uint64_t totalTime;
-    std::string deviceName;
-    uint64_t freeMemory;
-    uint64_t deviceMemory;
     uint64_t targets;
     secp256k1::uint256 nextKey;
 }KeySearchStatus;
diff --git a/Logger/Logger.cpp b/Logger/Logger.cpp
index 8e910ba..59af724 100644
--- a/Logger/Logger.cpp
+++ b/Logger/Logger.cpp
@@ -4,12 +4,29 @@
 #include "Logger.h"
 #include "util.h"
 
+inline tm localtime_xp(time_t timer)
+{
+	tm bt;
+#if defined(__unix__)
+	localtime_r(&timer, &bt);
+#elif defined(_MSC_VER)
+	localtime_s(&bt, &timer);
+#else
+	static std::mutex mtx;
+	std::lock_guard<std::mutex> lock(mtx);
+	bt = *std::localtime(&timer);
+#endif
+	return bt;
+}
+
 bool LogLevel::isValid(int level)
 {
 	switch(level) {
 		case Info:
 		case Error:
 		case Debug:
+		case Warning:
+		case Notify:
 			return true;
 		default:
 			return false;
@@ -27,9 +44,11 @@ std::string LogLevel::toString(int level)
 			return "Debug";
         case Warning:
             return "Warning";
+		case Notify:
+			return "Notify";
+		default:
+			return "";
 	}
-
-	return "";
 }
 
 std::string Logger::getDateTimeString()
@@ -37,22 +56,20 @@ std::string Logger::getDateTimeString()
 	time_t     now = time(0);
 	struct tm  tstruct;
 	char       buf[80];
-	tstruct = *localtime(&now);
+	tstruct = localtime_xp(now);
 
 	strftime(buf, sizeof(buf), "%Y-%m-%d.%X", &tstruct);
 
 	return std::string(buf);
 }
 
-std::string Logger::formatLog(int logLevel, std::string msg)
+std::string Logger::formatLog(LogLevel::Level logLevel, std::string msg)
 {
 	std::string dateTime = getDateTimeString();
 
 	std::string prefix = "[" + dateTime + "] [" + LogLevel::toString(logLevel) + "] ";
 
-	size_t prefixLen = prefix.length();
-
-	std::string padding(prefixLen, ' ');
+	std::string padding(prefix.length(), ' ');
 
 	if(msg.find('\n', 0) != std::string::npos) {
  		size_t pos = 0;
@@ -71,15 +88,11 @@ std::string Logger::formatLog(int logLevel, std::string msg)
 	return prefix;
 }
 
-
-void Logger::log(int logLevel, std::string msg)
+void Logger::log(LogLevel::Level level, std::string msg)
 {
-	std::string str = formatLog(logLevel, msg);
-
+	std::string str = formatLog(level, msg);
+	if (level == LogLevel::Level::Notify) {
+		fprintf(stderr, "\a");
+	}
 	fprintf(stderr, "%s\n", str.c_str());
 }
-
-void Logger::setLogFile(std::string path)
-{
-
-}
diff --git a/Logger/Logger.h b/Logger/Logger.h
index 40cf73b..3422119 100644
--- a/Logger/Logger.h
+++ b/Logger/Logger.h
@@ -1,29 +1,28 @@
-#ifndef _LOGGER_H
-#define _LOGGER_H
+#ifndef LOGGER_H
+#define LOGGER_H
 
 #include <string>
 
-
 namespace LogLevel {
 	enum Level {
 		Info = 1,
 		Error = 2,
 		Debug = 4,
-        Warning = 8
+        Warning = 8,
+		Notify = 16
 	};
 
 	bool isValid(int level);
 
 	std::string toString(int level);
-};
-
+}
 
 class Logger {
 
 private:
 	static std::string _logFile;
 
-	static std::string formatLog(int logLevel, std::string msg);
+	static std::string formatLog(LogLevel::Level logLevel, std::string msg);
 
 	static std::string getDateTimeString();
 
@@ -33,9 +32,8 @@ class Logger {
 	{
 	}
 
-	static void log(int logLevel, std::string msg);
+	static void log(LogLevel::Level level, std::string msg);
 
-	static void setLogFile(std::string path);
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/Logger/Logger.vcxproj b/Logger/Logger.vcxproj
index 8e1d7c3..1b11df7 100644
--- a/Logger/Logger.vcxproj
+++ b/Logger/Logger.vcxproj
@@ -5,6 +5,14 @@
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|Win32">
+      <Configuration>Performance Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|x64">
+      <Configuration>Performance Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -33,32 +41,46 @@
     <ProjectGuid>{150AF404-1F80-4A13-855B-4383C4A3326F}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>Logger</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PreferredToolArchitecture>x64</PreferredToolArchitecture>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
@@ -71,6 +93,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -79,6 +105,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -98,10 +128,23 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(SolutionDir)Util;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>EnableAllWarnings</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>$(SolutionDir)Util;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
@@ -110,6 +153,25 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <PreprocessorDefinitions>NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(SolutionDir)Util;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
diff --git a/README.md b/README.md
index 73bebe1..c8f5206 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# BitCrack
+# BitCrackOpenCL
 
 A tool for brute-forcing Bitcoin private keys. The main purpose of this project is to contribute to the effort of solving the [Bitcoin puzzle transaction](https://blockchain.info/tx/08389f34c98c606322740c0be6a7125d9860bb8d5cb182c02f98461e5fa6cd15): A transaction with 32 addresses that become increasingly difficult to crack.
 
@@ -135,7 +135,7 @@ kernel to run longer, but more keys will be processed.
 
 Visual Studio 2019 (if on Windows)
 
-For CUDA: CUDA Toolkit 10.1
+For CUDA: CUDA Toolkit 11.3
 
 For OpenCL: An OpenCL SDK (The CUDA toolkit contains an OpenCL SDK).
 
diff --git a/addresses.txt b/addresses.txt
new file mode 100644
index 0000000..71c2032
--- /dev/null
+++ b/addresses.txt
@@ -0,0 +1,160 @@
+1BgGZ9tcN4rm9KBzDn7KprQz87SZ26SAMH
+1CUNEBjYrCn2y1SdiUMohaKUi4wpP326Lb
+19ZewH8Kk1PDbSNdJ97FP4EiCjTRaZMZQA
+1EhqbyUMvvs7BfL8goY6qcPbD6YKfPqb7e
+1E6NuFjCi27W5zoXg8TRdcSRq84zJeBW3k
+1PitScNLyp2HCygzadCh7FveTnfmpPbfp8
+1McVt1vMtCC7yn5b9wgX1833yCcLXzueeC
+1M92tSqNmQLYw33fuBvjmeadirh1ysMBxK
+1CQFwcjw1dwhtkVWBttNLDtqL7ivBonGPV
+1LeBZP5QCwwgXRtmVUvTVrraqPUokyLHqe
+1PgQVLmst3Z314JrQn5TNiys8Hc38TcXJu
+1DBaumZxUkM4qMQRt2LVWyFJq5kDtSZQot
+1Pie8JkxBT6MGPz9Nvi3fsPkr2D8q3GBc1
+1ErZWg5cFCe4Vw5BzgfzB74VNLaXEiEkhk
+1QCbW9HWnwQWiQqVo5exhAnmfqKRrCRsvW
+1BDyrQ6WoF8VN3g9SAS1iKZcPzFfnDVieY
+1HduPEXZRdG26SUT5Yk83mLkPyjnZuJ7Bm
+1GnNTmTVLZiqQfLbAdp9DVdicEnB5GoERE
+1NWmZRpHH4XSPwsW6dsS3nrNWfL1yrJj4w
+1HsMJxNiV7TLxmoF6uJNkydxPFDog4NQum
+14oFNXucftsHiUMY8uctg6N487riuyXs4h
+1CfZWK1QTQE3eS9qn61dQjV89KDjZzfNcv
+1L2GM8eE7mJWLdo3HZS6su1832NX2txaac
+1rSnXMr63jdCuegJFuidJqWxUPV7AtUf7
+15JhYXn6Mx3oF4Y7PcTAv2wVVAuCFFQNiP
+1JVnST957hGztonaWK6FougdtjxzHzRMMg
+128z5d7nN7PkCuX5qoA4Ys6pmxUYnEy86k
+12jbtzBb54r97TCwW3G1gCFoumpckRAPdY
+19EEC52krRUK1RkUAEZmQdjTyHT7Gp1TYT
+1LHtnpd8nU5VHEMkG2TMYYNUjjLc992bps
+1LhE6sCTuGae42Axu1L1ZB7L96yi9irEBE
+1FRoHA9xewq7DjrZ1psWJVeTer8gHRqEvR
+187swFMjz1G54ycVU56B7jZFHFTNVQFDiu
+1PWABE7oUahG2AFFQhhvViQovnCr4rEv7Q
+1PWCx5fovoEaoBowAvF5k91m2Xat9bMgwb
+1Be2UF9NLfyLFbtm3TCbmuocc9N1Kduci1
+14iXhn8bGajVWegZHJ18vJLHhntcpL4dex
+1HBtApAFA9B2YZw3G2YKSMCtb3dVnjuNe2
+122AJhKLEfkFBaGAd84pLp1kfE7xK3GdT8
+1EeAxcprB2PpCnr34VfZdFrkUWuxyiNEFv
+1L5sU9qvJeuwQUdt4y1eiLmquFxKjtHr3E
+1E32GPWgDyeyQac4aJxm9HVoLrrEYPnM4N
+1PiFuqGpG8yGM5v6rNHWS3TjsG6awgEGA1
+1CkR2uS7LmFwc3T2jV8C1BhWb5mQaoxedF
+1NtiLNGegHWE3Mp9g2JPkgx6wUg4TW7bbk
+1F3JRMWudBaj48EhwcHDdpeuy2jwACNxjP
+1Pd8VvT49sHKsmqrQiP61RsVwmXCZ6ay7Z
+1DFYhaB2J9q1LLZJWKTnscPWos9VBqDHzv
+12CiUhYVTTH33w3SPUBqcpMoqnApAV4WCF
+1MEzite4ReNuWaL5Ds17ePKt2dCxWEofwk
+1NpnQyZ7x24ud82b7WiRNvPm6N8bqGQnaS
+15z9c9sVpu6fwNiK7dMAFgMYSK4GqsGZim
+15K1YKJMiJ4fpesTVUcByoz334rHmknxmT
+1KYUv7nSvXx4642TKeuC2SNdTk326uUpFy
+1LzhS3k3e9Ub8i2W1V8xQFdB8n2MYCHPCa
+17aPYR1m6pVAacXg1PTDDU7XafvK1dxvhi
+15c9mPGLku1HuW9LRtBf4jcHVpBUt8txKz
+1Dn8NF8qDyyfHMktmuoQLGyjWmZXgvosXf
+1HAX2n9Uruu9YDt4cqRgYcvtGvZj1rbUyt
+1Kn5h2qpgw9mWE5jKpk8PP4qvvJ1QVy8su
+1AVJKwzs9AskraJLGHAZPiaZcrpDr1U6AB
+1Me6EfpwZK5kQziBwBfvLiHjaPGxCKLoJi
+1NpYjtLira16LfGbGwZJ5JbDPh3ai9bjf4
+16jY7qLJnxb7CHZyqBP8qca9d51gAjyXQN
+18ZMbwUFLMHoZBbfpCjUJQTCMCbktshgpe
+13zb1hQbWVsc2S7ZTZnP2G4undNNpdh5so
+1BY8GQbnueYofwSuFAT3USAhGjPrkxDdW9
+1MVDYgVaSN6iKKEsbzRUAYFrYJadLYZvvZ
+19vkiEajfhuZ8bs8Zu2jgmC6oqZbWqhxhG
+19YZECXj3SxEZMoUeJ1yiPsw8xANe7M7QR
+1PWo3JeB9jrGwfHDNpdGK54CRas7fsVzXU
+1JTK7s9YVYywfm5XUH7RNhHJH1LshCaRFR
+12VVRNPi4SJqUTsp6FmqDqY5sGosDtysn4
+1FWGcVDK3JGzCC3WtkYetULPszMaK2Jksv
+1J36UjUByGroXcCvmj13U6uwaVv9caEeAt
+1DJh2eHFYQfACPmrvpyWc8MSTYKh7w9eRF
+1Bxk4CQdqL9p22JEtDfdXMsng1XacifUtE
+15qF6X51huDjqTmF9BJgxXdt1xcj46Jmhb
+1ARk8HWJMn8js8tQmGUJeQHjSE7KRkn2t8
+1BCf6rHUW6m3iH2ptsvnjgLruAiPQQepLe
+15qsCm78whspNQFydGJQk5rexzxTQopnHZ
+13zYrYhhJxp6Ui1VV7pqa5WDhNWM45ARAC
+14MdEb4eFcT3MVG5sPFG4jGLuHJSnt1Dk2
+1CMq3SvFcVEcpLMuuH8PUcNiqsK1oicG2D
+1Kh22PvXERd2xpTQk3ur6pPEqFeckCJfAr
+1K3x5L6G57Y494fDqBfrojD28UJv4s5JcK
+1PxH3K1Shdjb7gSEoTX7UPDZ6SH4qGPrvq
+16AbnZjZZipwHMkYKBSfswGWKDmXHjEpSf
+19QciEHbGVNY4hrhfKXmcBBCrJSBZ6TaVt
+1L12FHH2FHjvTviyanuiFVfmzCy46RRATU
+1EzVHtmbN4fs4MiNk3ppEnKKhsmXYJ4s74
+1AE8NzzgKE7Yhz7BWtAcAAxiFMbPo82NB5
+17Q7tuG2JwFFU9rXVj3uZqRtioH3mx2Jad
+1K6xGMUbs6ZTXBnhw1pippqwK6wjBWtNpL
+19eVSDuizydXxhohGh8Ki9WY9KsHdSwoQC
+15ANYzzCp5BFHcCnVFzXqyibpzgPLWaD8b
+18ywPwj39nGjqBrQJSzZVq2izR12MDpDr8
+1CaBVPrwUxbQYYswu32w7Mj4HR4maNoJSX
+1JWnE6p6UN7ZJBN7TtcbNDoRcjFtuDWoNL
+1KCgMv8fo2TPBpddVi9jqmMmcne9uSNJ5F
+1CKCVdbDJasYmhswB6HKZHEAnNaDpK7W4n
+1PXv28YxmYMaB8zxrKeZBW8dt2HK7RkRPX
+1AcAmB6jmtU6AiEcXkmiNE9TNVPsj9DULf
+1EQJvpsmhazYCcKX5Au6AZmZKRnzarMVZu
+1CMjscKB3QW7SDyQ4c3C3DEUHiHRhiZVib
+18KsfuHuzQaBTNLASyj15hy4LuqPUo1FNB
+15EJFC5ZTs9nhsdvSUeBXjLAuYq3SWaxTc
+1HB1iKUqeffnVsvQsbpC6dNi1XKbyNuqao
+1GvgAXVCbA8FBjXfWiAms4ytFeJcKsoyhL
+12JzYkkN76xkwvcPT6AWKZtGX6w2LAgsJg
+1824ZJQ7nKJ9QFTRBqn7z7dHV5EGpzUpH3
+18A7NA9FTsnJxWgkoFfPAFbQzuQxpRtCos
+1NeGn21dUDDeqFQ63xb2SpgUuXuBLA4WT4
+174SNxfqpdMGYy5YQcfLbSTK3MRNZEePoy
+1NLbHuJebVwUZ1XqDjsAyfTRUPwDQbemfv
+1MnJ6hdhvK37VLmqcdEwqC3iFxyWH2PHUV
+1KNRfGWw7Q9Rmwsc6NT5zsdvEb9M2Wkj5Z
+1PJZPzvGX19a7twf5HyD2VvNiPdHLzm9F6
+1GuBBhf61rnvRe4K8zu8vdQB3kHzwFqSy7
+17s2b9ksz5y7abUm92cHwG8jEPCzK3dLnT
+1GDSuiThEV64c166LUFC9uDcVdGjqkxKyh
+1Me3ASYt5JCTAK2XaC32RMeH34PdprrfDx
+1CdufMQL892A69KXgv6UNBD17ywWqYpKut
+1BkkGsX9ZM6iwL3zbqs7HWBV7SvosR6m8N
+1PXAyUB8ZoH3WD8n5zoAthYjN15yN5CVq5
+1AWCLZAjKbV1P7AHvaPNCKiB7ZWVDMxFiz
+1G6EFyBRU86sThN3SSt3GrHu1sA7w7nzi4
+1MZ2L1gFrCtkkn6DnTT2e4PFUTHw9gNwaj
+1Hz3uv3nNZzBVMXLGadCucgjiCs5W9vaGz
+1Fo65aKq8s8iquMt6weF1rku1moWVEd5Ua
+16zRPnT8znwq42q7XeMkZUhb1bKqgRogyy
+1KrU4dHE5WrW8rhWDsTRjR21r8t3dsrS3R
+17uDfp5r4n441xkgLFmhNoSW1KWp6xVLD
+13A3JrvXmvg5w9XGvyyR4JEJqiLz8ZySY3
+16RGFo6hjq9ym6Pj7N5H7L1NR1rVPJyw2v
+1UDHPdovvR985NrWSkdWQDEQ1xuRiTALq
+15nf31J46iLuK1ZkTnqHo7WgN5cARFK3RA
+1Ab4vzG6wEQBDNQM1B2bvUz4fqXXdFk2WT
+1Fz63c775VV9fNyj25d9Xfw3YHE6sKCxbt
+1QKBaU6WAeycb3DbKbLBkX7vJiaS8r42Xo
+1CD91Vm97mLQvXhrnoMChhJx4TP9MaQkJo
+15MnK2jXPqTMURX4xC3h4mAZxyCcaWWEDD
+13N66gCzWWHEZBxhVxG18P8wyjEWF9Yoi1
+1NevxKDYuDcCh1ZMMi6ftmWwGrZKC6j7Ux
+19GpszRNUej5yYqxXoLnbZWKew3KdVLkXg
+1M7ipcdYHey2Y5RZM34MBbpugghmjaV89P
+18aNhurEAJsw6BAgtANpexk5ob1aGTwSeL
+1FwZXt6EpRT7Fkndzv6K4b4DFoT4trbMrV
+1CXvTzR6qv8wJ7eprzUKeWxyGcHwDYP1i2
+1MUJSJYtGPVGkBCTqGspnxyHahpt5Te8jy
+13Q84TNNvgcL3HJiqQPvyBb9m4hxjS3jkV
+1LuUHyrQr8PKSvbcY1v1PiuGuqFjWpDumN
+18192XpzzdDi2K11QVHR7td2HcPS6Qs5vg
+1NgVmsCCJaKLzGyKLFJfVequnFW9ZvnMLN
+1AoeP37TmHdFh8uN72fu9AqgtLrUwcv2wJ
+1FTpAbQa4h8trvhQXjXnmNhqdiGBd1oraE
+14JHoRAdmJg3XR4RjMDh6Wed6ft6hzbQe9
+19z6waranEf8CcP8FqNgdwUe1QRxvUNKBG
+14u4nA5sugaswb6SZgn5av2vuChdMnD9E5
+1NBC8uXJy1GiJ6drkiZa1WuKn51ps7EPTv
diff --git a/clKeyFinder/clKeyFinder.vcxproj b/clKeyFinder/clKeyFinder.vcxproj
index c73ca1a..874c500 100644
--- a/clKeyFinder/clKeyFinder.vcxproj
+++ b/clKeyFinder/clKeyFinder.vcxproj
@@ -5,6 +5,14 @@
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|Win32">
+      <Configuration>Performance Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|x64">
+      <Configuration>Performance Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -22,32 +30,46 @@
     <VCProjectVersion>15.0</VCProjectVersion>
     <ProjectGuid>{36400E8D-3D04-430C-90A4-FC989E460B3C}</ProjectGuid>
     <RootNamespace>clKeyFinder</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PreferredToolArchitecture>x64</PreferredToolArchitecture>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
@@ -60,6 +82,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -68,6 +94,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -78,6 +108,11 @@
     <CustomBuildAfterTargets>Build</CustomBuildAfterTargets>
     <PostBuildEventUseInBuild>false</PostBuildEventUseInBuild>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'">
+    <TargetName>clBitCrack</TargetName>
+    <CustomBuildAfterTargets>Build</CustomBuildAfterTargets>
+    <PostBuildEventUseInBuild>false</PostBuildEventUseInBuild>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <TargetName>clBitCrack</TargetName>
     <CustomBuildAfterTargets>Build</CustomBuildAfterTargets>
@@ -95,6 +130,48 @@
     <Link>
       <AdditionalDependencies>OpenCL.lib;Shlwapi.lib;BCrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <AdditionalLibraryDirectories>$(OPENCL_LIB);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <SubSystem>Console</SubSystem>
+    </Link>
+    <CustomBuildStep>
+      <Command>
+      </Command>
+    </CustomBuildStep>
+    <CustomBuildStep>
+      <Message>
+      </Message>
+      <Outputs>
+      </Outputs>
+      <TreatOutputAsContent>true</TreatOutputAsContent>
+    </CustomBuildStep>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <PostBuildEvent>
+      <Message>
+      </Message>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>$(SolutionDir)\KeyFinderLib;$(SolutionDir)\AddressUtil;$(SolutionDir)\Logger;$(SolutionDir)\CmdParse;$(SolutionDir)\CLKeySearchDevice;$(SolutionDir)\secp256k1lib;$(SolutionDir)\util;$(SolutionDir)\clUtil;$(OPENCL_INCLUDE);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;BUILD_OPENCL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>OpenCL.lib;Shlwapi.lib;BCrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(OPENCL_LIB);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <SubSystem>Console</SubSystem>
     </Link>
     <CustomBuildStep>
       <Command>
@@ -124,6 +201,14 @@
       <ConformanceMode>true</ConformanceMode>
     </ClCompile>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -154,6 +239,7 @@
       <OptimizeReferences>true</OptimizeReferences>
       <AdditionalDependencies>OpenCL.lib;Shlwapi.lib;BCrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <AdditionalLibraryDirectories>$(OPENCL_LIB);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <SubSystem>Console</SubSystem>
     </Link>
     <CustomBuildStep>
       <Command>
diff --git a/clMath/clMath.vcxproj b/clMath/clMath.vcxproj
index 5236bff..c434979 100644
--- a/clMath/clMath.vcxproj
+++ b/clMath/clMath.vcxproj
@@ -5,6 +5,14 @@
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|Win32">
+      <Configuration>Performance Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|x64">
+      <Configuration>Performance Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -22,32 +30,46 @@
     <VCProjectVersion>15.0</VCProjectVersion>
     <ProjectGuid>{83327841-C283-4D46-A873-97AC674C68AC}</ProjectGuid>
     <RootNamespace>clMath</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PreferredToolArchitecture>x64</PreferredToolArchitecture>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
@@ -59,12 +81,18 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
@@ -92,7 +120,7 @@
       <ConformanceMode>true</ConformanceMode>
     </ClCompile>
   </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
@@ -100,6 +128,28 @@
       <ConformanceMode>true</ConformanceMode>
     </ClCompile>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>EnableAllWarnings</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <DebugInformationFormat>None</DebugInformationFormat> 
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+    </ClCompile>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl
index 300ead1..97bd624 100644
--- a/clMath/ripemd160.cl
+++ b/clMath/ripemd160.cl
@@ -1,24 +1,17 @@
-#ifndef _RIPEMD160_CL
-#define _RIPEMD160_CL
+#ifndef RIPEMD160_CL
+#define RIPEMD160_CL
 
+#ifndef endian
+#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24)
+#endif
 
-__constant unsigned int _RIPEMD160_IV[5] = {
-    0x67452301,
-    0xefcdab89,
-    0x98badcfe,
-    0x10325476,
-    0xc3d2e1f0
+__constant unsigned int RIPEMD160_IV[5] = {
+    0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0,
 };
 
-__constant unsigned int _K0 = 0x5a827999;
-__constant unsigned int _K1 = 0x6ed9eba1;
-__constant unsigned int _K2 = 0x8f1bbcdc;
-__constant unsigned int _K3 = 0xa953fd4e;
-
-__constant unsigned int _K4 = 0x7a6d76e9;
-__constant unsigned int _K5 = 0x6d703ef3;
-__constant unsigned int _K6 = 0x5c4dd124;
-__constant unsigned int _K7 = 0x50a28be6;
+__constant unsigned int K[8] = {
+    0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xa953fd4e, 0x7a6d76e9, 0x6d703ef3, 0x5c4dd124, 0x50a28be6
+};
 
 #define rotl(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
 
@@ -38,22 +31,22 @@ __constant unsigned int _K7 = 0x50a28be6;
     c = rotl((c), 10)
 
 #define GG(a, b, c, d, e, x, s)\
-    a += G((b), (c), (d)) + (x) + _K0;\
+    a += G((b), (c), (d)) + (x) + K[0];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
 #define HH(a, b, c, d, e, x, s)\
-    a += H((b), (c), (d)) + (x) + _K1;\
+    a += H((b), (c), (d)) + (x) + K[1];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
 #define II(a, b, c, d, e, x, s)\
-    a += I((b), (c), (d)) + (x) + _K2;\
+    a += I((b), (c), (d)) + (x) + K[2];\
     a = rotl((a), (s)) + e;\
     c = rotl((c), 10)
 
 #define JJ(a, b, c, d, e, x, s)\
-    a += J((b), (c), (d)) + (x) + _K3;\
+    a += J((b), (c), (d)) + (x) + K[3];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
@@ -63,432 +56,257 @@ __constant unsigned int _K7 = 0x50a28be6;
     c = rotl((c), 10)
 
 #define GGG(a, b, c, d, e, x, s)\
-    a += G((b), (c), (d)) + x + _K4;\
+    a += G((b), (c), (d)) + x + K[4];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
 #define HHH(a, b, c, d, e, x, s)\
-    a += H((b), (c), (d)) + (x) + _K5;\
+    a += H((b), (c), (d)) + (x) + K[5];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
 #define III(a, b, c, d, e, x, s)\
-    a += I((b), (c), (d)) + (x) + _K6;\
+    a += I((b), (c), (d)) + (x) + K[6];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
 #define JJJ(a, b, c, d, e, x, s)\
-    a += J((b), (c), (d)) + (x) + _K7;\
+    a += J((b), (c), (d)) + (x) + K[7];\
     a = rotl((a), (s)) + (e);\
     c = rotl((c), 10)
 
-
-void ripemd160sha256(const unsigned int x[8], unsigned int digest[5])
+void ripemd160p1(const unsigned int x[8], unsigned int digest[5])
 {
-    unsigned int a1 = _RIPEMD160_IV[0];
-    unsigned int b1 = _RIPEMD160_IV[1];
-    unsigned int c1 = _RIPEMD160_IV[2];
-    unsigned int d1 = _RIPEMD160_IV[3];
-    unsigned int e1 = _RIPEMD160_IV[4];
-
-    const unsigned int x8 = 0x00000080;
-    const unsigned int x14 = 256;
+    __private unsigned int a = RIPEMD160_IV[0];
+    __private unsigned int b = RIPEMD160_IV[1];
+    __private unsigned int c = RIPEMD160_IV[2];
+    __private unsigned int d = RIPEMD160_IV[3];
+    __private unsigned int e = RIPEMD160_IV[4];
 
     /* round 1 */
-    FF(a1, b1, c1, d1, e1, x[0], 11);
-    FF(e1, a1, b1, c1, d1, x[1], 14);
-    FF(d1, e1, a1, b1, c1, x[2], 15);
-    FF(c1, d1, e1, a1, b1, x[3], 12);
-    FF(b1, c1, d1, e1, a1, x[4], 5);
-    FF(a1, b1, c1, d1, e1, x[5], 8);
-    FF(e1, a1, b1, c1, d1, x[6], 7);
-    FF(d1, e1, a1, b1, c1, x[7], 9);
-    FF(c1, d1, e1, a1, b1, x8, 11);
-    FF(b1, c1, d1, e1, a1, 0, 13);
-    FF(a1, b1, c1, d1, e1, 0, 14);
-    FF(e1, a1, b1, c1, d1, 0, 15);
-    FF(d1, e1, a1, b1, c1, 0, 6);
-    FF(c1, d1, e1, a1, b1, 0, 7);
-    FF(b1, c1, d1, e1, a1, x14, 9);
-    FF(a1, b1, c1, d1, e1, 0, 8);
+    FF(a, b, c, d, e, x[0], 11);
+    FF(e, a, b, c, d, x[1], 14);
+    FF(d, e, a, b, c, x[2], 15);
+    FF(c, d, e, a, b, x[3], 12);
+    FF(b, c, d, e, a, x[4], 5);
+    FF(a, b, c, d, e, x[5], 8);
+    FF(e, a, b, c, d, x[6], 7);
+    FF(d, e, a, b, c, x[7], 9);
+    FF(c, d, e, a, b, 128, 11);
+    FF(b, c, d, e, a, 0, 13);
+    FF(a, b, c, d, e, 0, 14);
+    FF(e, a, b, c, d, 0, 15);
+    FF(d, e, a, b, c, 0, 6);
+    FF(c, d, e, a, b, 0, 7);
+    FF(b, c, d, e, a, 256, 9);
+    FF(a, b, c, d, e, 0, 8);
 
     /* round 2 */
-    GG(e1, a1, b1, c1, d1, x[7], 7);
-    GG(d1, e1, a1, b1, c1, x[4], 6);
-    GG(c1, d1, e1, a1, b1, 0, 8);
-    GG(b1, c1, d1, e1, a1, x[1], 13);
-    GG(a1, b1, c1, d1, e1, 0, 11);
-    GG(e1, a1, b1, c1, d1, x[6], 9);
-    GG(d1, e1, a1, b1, c1, 0, 7);
-    GG(c1, d1, e1, a1, b1, x[3], 15);
-    GG(b1, c1, d1, e1, a1, 0, 7);
-    GG(a1, b1, c1, d1, e1, x[0], 12);
-    GG(e1, a1, b1, c1, d1, 0, 15);
-    GG(d1, e1, a1, b1, c1, x[5], 9);
-    GG(c1, d1, e1, a1, b1, x[2], 11);
-    GG(b1, c1, d1, e1, a1, x14, 7);
-    GG(a1, b1, c1, d1, e1, 0, 13);
-    GG(e1, a1, b1, c1, d1, x8, 12);
+    GG(e, a, b, c, d, x[7], 7);
+    GG(d, e, a, b, c, x[4], 6);
+    GG(c, d, e, a, b, 0, 8);
+    GG(b, c, d, e, a, x[1], 13);
+    GG(a, b, c, d, e, 0, 11);
+    GG(e, a, b, c, d, x[6], 9);
+    GG(d, e, a, b, c, 0, 7);
+    GG(c, d, e, a, b, x[3], 15);
+    GG(b, c, d, e, a, 0, 7);
+    GG(a, b, c, d, e, x[0], 12);
+    GG(e, a, b, c, d, 0, 15);
+    GG(d, e, a, b, c, x[5], 9);
+    GG(c, d, e, a, b, x[2], 11);
+    GG(b, c, d, e, a, 256, 7);
+    GG(a, b, c, d, e, 0, 13);
+    GG(e, a, b, c, d, 0x80, 12);
 
     /* round 3 */
-    HH(d1, e1, a1, b1, c1, x[3], 11);
-    HH(c1, d1, e1, a1, b1, 0, 13);
-    HH(b1, c1, d1, e1, a1, x14, 6);
-    HH(a1, b1, c1, d1, e1, x[4], 7);
-    HH(e1, a1, b1, c1, d1, 0, 14);
-    HH(d1, e1, a1, b1, c1, 0, 9);
-    HH(c1, d1, e1, a1, b1, x8, 13);
-    HH(b1, c1, d1, e1, a1, x[1], 15);
-    HH(a1, b1, c1, d1, e1, x[2], 14);
-    HH(e1, a1, b1, c1, d1, x[7], 8);
-    HH(d1, e1, a1, b1, c1, x[0], 13);
-    HH(c1, d1, e1, a1, b1, x[6], 6);
-    HH(b1, c1, d1, e1, a1, 0, 5);
-    HH(a1, b1, c1, d1, e1, 0, 12);
-    HH(e1, a1, b1, c1, d1, x[5], 7);
-    HH(d1, e1, a1, b1, c1, 0, 5);
+    HH(d, e, a, b, c, x[3], 11);
+    HH(c, d, e, a, b, 0, 13);
+    HH(b, c, d, e, a, 256, 6);
+    HH(a, b, c, d, e, x[4], 7);
+    HH(e, a, b, c, d, 0, 14);
+    HH(d, e, a, b, c, 0, 9);
+    HH(c, d, e, a, b, 0x80, 13);
+    HH(b, c, d, e, a, x[1], 15);
+    HH(a, b, c, d, e, x[2], 14);
+    HH(e, a, b, c, d, x[7], 8);
+    HH(d, e, a, b, c, x[0], 13);
+    HH(c, d, e, a, b, x[6], 6);
+    HH(b, c, d, e, a, 0, 5);
+    HH(a, b, c, d, e, 0, 12);
+    HH(e, a, b, c, d, x[5], 7);
+    HH(d, e, a, b, c, 0, 5);
 
     /* round 4 */
-    II(c1, d1, e1, a1, b1, x[1], 11);
-    II(b1, c1, d1, e1, a1, 0, 12);
-    II(a1, b1, c1, d1, e1, 0, 14);
-    II(e1, a1, b1, c1, d1, 0, 15);
-    II(d1, e1, a1, b1, c1, x[0], 14);
-    II(c1, d1, e1, a1, b1, x8, 15);
-    II(b1, c1, d1, e1, a1, 0, 9);
-    II(a1, b1, c1, d1, e1, x[4], 8);
-    II(e1, a1, b1, c1, d1, 0, 9);
-    II(d1, e1, a1, b1, c1, x[3], 14);
-    II(c1, d1, e1, a1, b1, x[7], 5);
-    II(b1, c1, d1, e1, a1, 0, 6);
-    II(a1, b1, c1, d1, e1, x14, 8);
-    II(e1, a1, b1, c1, d1, x[5], 6);
-    II(d1, e1, a1, b1, c1, x[6], 5);
-    II(c1, d1, e1, a1, b1, x[2], 12);
+    II(c, d, e, a, b, x[1], 11);
+    II(b, c, d, e, a, 0, 12);
+    II(a, b, c, d, e, 0, 14);
+    II(e, a, b, c, d, 0, 15);
+    II(d, e, a, b, c, x[0], 14);
+    II(c, d, e, a, b, 0x80, 15);
+    II(b, c, d, e, a, 0, 9);
+    II(a, b, c, d, e, x[4], 8);
+    II(e, a, b, c, d, 0, 9);
+    II(d, e, a, b, c, x[3], 14);
+    II(c, d, e, a, b, x[7], 5);
+    II(b, c, d, e, a, 0, 6);
+    II(a, b, c, d, e, 256, 8);
+    II(e, a, b, c, d, x[5], 6);
+    II(d, e, a, b, c, x[6], 5);
+    II(c, d, e, a, b, x[2], 12);
 
     /* round 5 */
-    JJ(b1, c1, d1, e1, a1, x[4], 9);
-    JJ(a1, b1, c1, d1, e1, x[0], 15);
-    JJ(e1, a1, b1, c1, d1, x[5], 5);
-    JJ(d1, e1, a1, b1, c1, 0, 11);
-    JJ(c1, d1, e1, a1, b1, x[7], 6);
-    JJ(b1, c1, d1, e1, a1, 0, 8);
-    JJ(a1, b1, c1, d1, e1, x[2], 13);
-    JJ(e1, a1, b1, c1, d1, 0, 12);
-    JJ(d1, e1, a1, b1, c1, x14, 5);
-    JJ(c1, d1, e1, a1, b1, x[1], 12);
-    JJ(b1, c1, d1, e1, a1, x[3], 13);
-    JJ(a1, b1, c1, d1, e1, x8, 14);
-    JJ(e1, a1, b1, c1, d1, 0, 11);
-    JJ(d1, e1, a1, b1, c1, x[6], 8);
-    JJ(c1, d1, e1, a1, b1, 0, 5);
-    JJ(b1, c1, d1, e1, a1, 0, 6);
-
-    unsigned int a2 = _RIPEMD160_IV[0];
-    unsigned int b2 = _RIPEMD160_IV[1];
-    unsigned int c2 = _RIPEMD160_IV[2];
-    unsigned int d2 = _RIPEMD160_IV[3];
-    unsigned int e2 = _RIPEMD160_IV[4];
+    JJ(b, c, d, e, a, x[4], 9);
+    JJ(a, b, c, d, e, x[0], 15);
+    JJ(e, a, b, c, d, x[5], 5);
+    JJ(d, e, a, b, c, 0, 11);
+    JJ(c, d, e, a, b, x[7], 6);
+    JJ(b, c, d, e, a, 0, 8);
+    JJ(a, b, c, d, e, x[2], 13);
+    JJ(e, a, b, c, d, 0, 12);
+    JJ(d, e, a, b, c, 256, 5);
+    JJ(c, d, e, a, b, x[1], 12);
+    JJ(b, c, d, e, a, x[3], 13);
+    JJ(a, b, c, d, e, 0x80, 14);
+    JJ(e, a, b, c, d, 0, 11);
+    JJ(d, e, a, b, c, x[6], 8);
+    JJ(c, d, e, a, b, 0, 5);
+    JJ(b, c, d, e, a, 0, 6);
+
+    digest[0] = c;
+    digest[1] = d;
+    digest[2] = e;
+    digest[3] = a;
+    digest[4] = b;
+}
+
+void ripemd160p2(const unsigned int x[8], unsigned int digest[5])
+{
+    __private unsigned int a = RIPEMD160_IV[0];
+    __private unsigned int b = RIPEMD160_IV[1];
+    __private unsigned int c = RIPEMD160_IV[2];
+    __private unsigned int d = RIPEMD160_IV[3];
+    __private unsigned int e = RIPEMD160_IV[4];
 
     /* parallel round 1 */
-    JJJ(a2, b2, c2, d2, e2, x[5], 8);
-    JJJ(e2, a2, b2, c2, d2, x14, 9);
-    JJJ(d2, e2, a2, b2, c2, x[7], 9);
-    JJJ(c2, d2, e2, a2, b2, x[0], 11);
-    JJJ(b2, c2, d2, e2, a2, 0, 13);
-    JJJ(a2, b2, c2, d2, e2, x[2], 15);
-    JJJ(e2, a2, b2, c2, d2, 0, 15);
-    JJJ(d2, e2, a2, b2, c2, x[4], 5);
-    JJJ(c2, d2, e2, a2, b2, 0, 7);
-    JJJ(b2, c2, d2, e2, a2, x[6], 7);
-    JJJ(a2, b2, c2, d2, e2, 0, 8);
-    JJJ(e2, a2, b2, c2, d2, x8, 11);
-    JJJ(d2, e2, a2, b2, c2, x[1], 14);
-    JJJ(c2, d2, e2, a2, b2, 0, 14);
-    JJJ(b2, c2, d2, e2, a2, x[3], 12);
-    JJJ(a2, b2, c2, d2, e2, 0, 6);
+    JJJ(a, b, c, d, e, x[5], 8);
+    JJJ(e, a, b, c, d, 256, 9);
+    JJJ(d, e, a, b, c, x[7], 9);
+    JJJ(c, d, e, a, b, x[0], 11);
+    JJJ(b, c, d, e, a, 0, 13);
+    JJJ(a, b, c, d, e, x[2], 15);
+    JJJ(e, a, b, c, d, 0, 15);
+    JJJ(d, e, a, b, c, x[4], 5);
+    JJJ(c, d, e, a, b, 0, 7);
+    JJJ(b, c, d, e, a, x[6], 7);
+    JJJ(a, b, c, d, e, 0, 8);
+    JJJ(e, a, b, c, d, 0x80, 11);
+    JJJ(d, e, a, b, c, x[1], 14);
+    JJJ(c, d, e, a, b, 0, 14);
+    JJJ(b, c, d, e, a, x[3], 12);
+    JJJ(a, b, c, d, e, 0, 6);
 
     /* parallel round 2 */
-    III(e2, a2, b2, c2, d2, x[6], 9);
-    III(d2, e2, a2, b2, c2, 0, 13);
-    III(c2, d2, e2, a2, b2, x[3], 15);
-    III(b2, c2, d2, e2, a2, x[7], 7);
-    III(a2, b2, c2, d2, e2, x[0], 12);
-    III(e2, a2, b2, c2, d2, 0, 8);
-    III(d2, e2, a2, b2, c2, x[5], 9);
-    III(c2, d2, e2, a2, b2, 0, 11);
-    III(b2, c2, d2, e2, a2, x14, 7);
-    III(a2, b2, c2, d2, e2, 0, 7);
-    III(e2, a2, b2, c2, d2, x8, 12);
-    III(d2, e2, a2, b2, c2, 0, 7);
-    III(c2, d2, e2, a2, b2, x[4], 6);
-    III(b2, c2, d2, e2, a2, 0, 15);
-    III(a2, b2, c2, d2, e2, x[1], 13);
-    III(e2, a2, b2, c2, d2, x[2], 11);
+    III(e, a, b, c, d, x[6], 9);
+    III(d, e, a, b, c, 0, 13);
+    III(c, d, e, a, b, x[3], 15);
+    III(b, c, d, e, a, x[7], 7);
+    III(a, b, c, d, e, x[0], 12);
+    III(e, a, b, c, d, 0, 8);
+    III(d, e, a, b, c, x[5], 9);
+    III(c, d, e, a, b, 0, 11);
+    III(b, c, d, e, a, 256, 7);
+    III(a, b, c, d, e, 0, 7);
+    III(e, a, b, c, d, 0x80, 12);
+    III(d, e, a, b, c, 0, 7);
+    III(c, d, e, a, b, x[4], 6);
+    III(b, c, d, e, a, 0, 15);
+    III(a, b, c, d, e, x[1], 13);
+    III(e, a, b, c, d, x[2], 11);
 
     /* parallel round 3 */
-    HHH(d2, e2, a2, b2, c2, 0, 9);
-    HHH(c2, d2, e2, a2, b2, x[5], 7);
-    HHH(b2, c2, d2, e2, a2, x[1], 15);
-    HHH(a2, b2, c2, d2, e2, x[3], 11);
-    HHH(e2, a2, b2, c2, d2, x[7], 8);
-    HHH(d2, e2, a2, b2, c2, x14, 6);
-    HHH(c2, d2, e2, a2, b2, x[6], 6);
-    HHH(b2, c2, d2, e2, a2, 0, 14);
-    HHH(a2, b2, c2, d2, e2, 0, 12);
-    HHH(e2, a2, b2, c2, d2, x8, 13);
-    HHH(d2, e2, a2, b2, c2, 0, 5);
-    HHH(c2, d2, e2, a2, b2, x[2], 14);
-    HHH(b2, c2, d2, e2, a2, 0, 13);
-    HHH(a2, b2, c2, d2, e2, x[0], 13);
-    HHH(e2, a2, b2, c2, d2, x[4], 7);
-    HHH(d2, e2, a2, b2, c2, 0, 5);
+    HHH(d, e, a, b, c, 0, 9);
+    HHH(c, d, e, a, b, x[5], 7);
+    HHH(b, c, d, e, a, x[1], 15);
+    HHH(a, b, c, d, e, x[3], 11);
+    HHH(e, a, b, c, d, x[7], 8);
+    HHH(d, e, a, b, c, 256, 6);
+    HHH(c, d, e, a, b, x[6], 6);
+    HHH(b, c, d, e, a, 0, 14);
+    HHH(a, b, c, d, e, 0, 12);
+    HHH(e, a, b, c, d, 0x80, 13);
+    HHH(d, e, a, b, c, 0, 5);
+    HHH(c, d, e, a, b, x[2], 14);
+    HHH(b, c, d, e, a, 0, 13);
+    HHH(a, b, c, d, e, x[0], 13);
+    HHH(e, a, b, c, d, x[4], 7);
+    HHH(d, e, a, b, c, 0, 5);
 
     /* parallel round 4 */
-    GGG(c2, d2, e2, a2, b2, x8, 15);
-    GGG(b2, c2, d2, e2, a2, x[6], 5);
-    GGG(a2, b2, c2, d2, e2, x[4], 8);
-    GGG(e2, a2, b2, c2, d2, x[1], 11);
-    GGG(d2, e2, a2, b2, c2, x[3], 14);
-    GGG(c2, d2, e2, a2, b2, 0, 14);
-    GGG(b2, c2, d2, e2, a2, 0, 6);
-    GGG(a2, b2, c2, d2, e2, x[0], 14);
-    GGG(e2, a2, b2, c2, d2, x[5], 6);
-    GGG(d2, e2, a2, b2, c2, 0, 9);
-    GGG(c2, d2, e2, a2, b2, x[2], 12);
-    GGG(b2, c2, d2, e2, a2, 0, 9);
-    GGG(a2, b2, c2, d2, e2, 0, 12);
-    GGG(e2, a2, b2, c2, d2, x[7], 5);
-    GGG(d2, e2, a2, b2, c2, 0, 15);
-    GGG(c2, d2, e2, a2, b2, x14, 8);
+    GGG(c, d, e, a, b, 0x80, 15);
+    GGG(b, c, d, e, a, x[6], 5);
+    GGG(a, b, c, d, e, x[4], 8);
+    GGG(e, a, b, c, d, x[1], 11);
+    GGG(d, e, a, b, c, x[3], 14);
+    GGG(c, d, e, a, b, 0, 14);
+    GGG(b, c, d, e, a, 0, 6);
+    GGG(a, b, c, d, e, x[0], 14);
+    GGG(e, a, b, c, d, x[5], 6);
+    GGG(d, e, a, b, c, 0, 9);
+    GGG(c, d, e, a, b, x[2], 12);
+    GGG(b, c, d, e, a, 0, 9);
+    GGG(a, b, c, d, e, 0, 12);
+    GGG(e, a, b, c, d, x[7], 5);
+    GGG(d, e, a, b, c, 0, 15);
+    GGG(c, d, e, a, b, 256, 8);
 
     /* parallel round 5 */
-    FFF(b2, c2, d2, e2, a2, 0, 8);
-    FFF(a2, b2, c2, d2, e2, 0, 5);
-    FFF(e2, a2, b2, c2, d2, 0, 12);
-    FFF(d2, e2, a2, b2, c2, x[4], 9);
-    FFF(c2, d2, e2, a2, b2, x[1], 12);
-    FFF(b2, c2, d2, e2, a2, x[5], 5);
-    FFF(a2, b2, c2, d2, e2, x8, 14);
-    FFF(e2, a2, b2, c2, d2, x[7], 6);
-    FFF(d2, e2, a2, b2, c2, x[6], 8);
-    FFF(c2, d2, e2, a2, b2, x[2], 13);
-    FFF(b2, c2, d2, e2, a2, 0, 6);
-    FFF(a2, b2, c2, d2, e2, x14, 5);
-    FFF(e2, a2, b2, c2, d2, x[0], 15);
-    FFF(d2, e2, a2, b2, c2, x[3], 13);
-    FFF(c2, d2, e2, a2, b2, 0, 11);
-    FFF(b2, c2, d2, e2, a2, 0, 11);
-
-    digest[0] = _RIPEMD160_IV[1] + c1 + d2;
-    digest[1] = _RIPEMD160_IV[2] + d1 + e2;
-    digest[2] = _RIPEMD160_IV[3] + e1 + a2;
-    digest[3] = _RIPEMD160_IV[4] + a1 + b2;
-    digest[4] = _RIPEMD160_IV[0] + b1 + c2;
+    FFF(b, c, d, e, a, 0, 8);
+    FFF(a, b, c, d, e, 0, 5);
+    FFF(e, a, b, c, d, 0, 12);
+    FFF(d, e, a, b, c, x[4], 9);
+    FFF(c, d, e, a, b, x[1], 12);
+    FFF(b, c, d, e, a, x[5], 5);
+    FFF(a, b, c, d, e, 0x80, 14);
+    FFF(e, a, b, c, d, x[7], 6);
+    FFF(d, e, a, b, c, x[6], 8);
+    FFF(c, d, e, a, b, x[2], 13);
+    FFF(b, c, d, e, a, 0, 6);
+    FFF(a, b, c, d, e, 256, 5);
+    FFF(e, a, b, c, d, x[0], 15);
+    FFF(d, e, a, b, c, x[3], 13);
+    FFF(c, d, e, a, b, 0, 11);
+    FFF(b, c, d, e, a, 0, 11);
+
+    digest[0] = d;
+    digest[1] = e;
+    digest[2] = a;
+    digest[3] = b;
+    digest[4] = c;
 }
 
-
 void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5])
 {
-    unsigned int a1 = _RIPEMD160_IV[0];
-    unsigned int b1 = _RIPEMD160_IV[1];
-    unsigned int c1 = _RIPEMD160_IV[2];
-    unsigned int d1 = _RIPEMD160_IV[3];
-    unsigned int e1 = _RIPEMD160_IV[4];
-
-    const unsigned int x8 = 0x00000080;
-    const unsigned int x14 = 256;
-
-    /* round 1 */
-    FF(a1, b1, c1, d1, e1, x[0], 11);
-    FF(e1, a1, b1, c1, d1, x[1], 14);
-    FF(d1, e1, a1, b1, c1, x[2], 15);
-    FF(c1, d1, e1, a1, b1, x[3], 12);
-    FF(b1, c1, d1, e1, a1, x[4], 5);
-    FF(a1, b1, c1, d1, e1, x[5], 8);
-    FF(e1, a1, b1, c1, d1, x[6], 7);
-    FF(d1, e1, a1, b1, c1, x[7], 9);
-    FF(c1, d1, e1, a1, b1, x8, 11);
-    FF(b1, c1, d1, e1, a1, 0, 13);
-    FF(a1, b1, c1, d1, e1, 0, 14);
-    FF(e1, a1, b1, c1, d1, 0, 15);
-    FF(d1, e1, a1, b1, c1, 0, 6);
-    FF(c1, d1, e1, a1, b1, 0, 7);
-    FF(b1, c1, d1, e1, a1, x14, 9);
-    FF(a1, b1, c1, d1, e1, 0, 8);
-
-    /* round 2 */
-    GG(e1, a1, b1, c1, d1, x[7], 7);
-    GG(d1, e1, a1, b1, c1, x[4], 6);
-    GG(c1, d1, e1, a1, b1, 0, 8);
-    GG(b1, c1, d1, e1, a1, x[1], 13);
-    GG(a1, b1, c1, d1, e1, 0, 11);
-    GG(e1, a1, b1, c1, d1, x[6], 9);
-    GG(d1, e1, a1, b1, c1, 0, 7);
-    GG(c1, d1, e1, a1, b1, x[3], 15);
-    GG(b1, c1, d1, e1, a1, 0, 7);
-    GG(a1, b1, c1, d1, e1, x[0], 12);
-    GG(e1, a1, b1, c1, d1, 0, 15);
-    GG(d1, e1, a1, b1, c1, x[5], 9);
-    GG(c1, d1, e1, a1, b1, x[2], 11);
-    GG(b1, c1, d1, e1, a1, x14, 7);
-    GG(a1, b1, c1, d1, e1, 0, 13);
-    GG(e1, a1, b1, c1, d1, x8, 12);
-
-    /* round 3 */
-    HH(d1, e1, a1, b1, c1, x[3], 11);
-    HH(c1, d1, e1, a1, b1, 0, 13);
-    HH(b1, c1, d1, e1, a1, x14, 6);
-    HH(a1, b1, c1, d1, e1, x[4], 7);
-    HH(e1, a1, b1, c1, d1, 0, 14);
-    HH(d1, e1, a1, b1, c1, 0, 9);
-    HH(c1, d1, e1, a1, b1, x8, 13);
-    HH(b1, c1, d1, e1, a1, x[1], 15);
-    HH(a1, b1, c1, d1, e1, x[2], 14);
-    HH(e1, a1, b1, c1, d1, x[7], 8);
-    HH(d1, e1, a1, b1, c1, x[0], 13);
-    HH(c1, d1, e1, a1, b1, x[6], 6);
-    HH(b1, c1, d1, e1, a1, 0, 5);
-    HH(a1, b1, c1, d1, e1, 0, 12);
-    HH(e1, a1, b1, c1, d1, x[5], 7);
-    HH(d1, e1, a1, b1, c1, 0, 5);
-
-    /* round 4 */
-    II(c1, d1, e1, a1, b1, x[1], 11);
-    II(b1, c1, d1, e1, a1, 0, 12);
-    II(a1, b1, c1, d1, e1, 0, 14);
-    II(e1, a1, b1, c1, d1, 0, 15);
-    II(d1, e1, a1, b1, c1, x[0], 14);
-    II(c1, d1, e1, a1, b1, x8, 15);
-    II(b1, c1, d1, e1, a1, 0, 9);
-    II(a1, b1, c1, d1, e1, x[4], 8);
-    II(e1, a1, b1, c1, d1, 0, 9);
-    II(d1, e1, a1, b1, c1, x[3], 14);
-    II(c1, d1, e1, a1, b1, x[7], 5);
-    II(b1, c1, d1, e1, a1, 0, 6);
-    II(a1, b1, c1, d1, e1, x14, 8);
-    II(e1, a1, b1, c1, d1, x[5], 6);
-    II(d1, e1, a1, b1, c1, x[6], 5);
-    II(c1, d1, e1, a1, b1, x[2], 12);
-
-    /* round 5 */
-    JJ(b1, c1, d1, e1, a1, x[4], 9);
-    JJ(a1, b1, c1, d1, e1, x[0], 15);
-    JJ(e1, a1, b1, c1, d1, x[5], 5);
-    JJ(d1, e1, a1, b1, c1, 0, 11);
-    JJ(c1, d1, e1, a1, b1, x[7], 6);
-    JJ(b1, c1, d1, e1, a1, 0, 8);
-    JJ(a1, b1, c1, d1, e1, x[2], 13);
-    JJ(e1, a1, b1, c1, d1, 0, 12);
-    JJ(d1, e1, a1, b1, c1, x14, 5);
-    JJ(c1, d1, e1, a1, b1, x[1], 12);
-    JJ(b1, c1, d1, e1, a1, x[3], 13);
-    JJ(a1, b1, c1, d1, e1, x8, 14);
-    JJ(e1, a1, b1, c1, d1, 0, 11);
-    JJ(d1, e1, a1, b1, c1, x[6], 8);
-    JJ(c1, d1, e1, a1, b1, 0, 5);
-    JJ(b1, c1, d1, e1, a1, 0, 6);
-
-    unsigned int a2 = _RIPEMD160_IV[0];
-    unsigned int b2 = _RIPEMD160_IV[1];
-    unsigned int c2 = _RIPEMD160_IV[2];
-    unsigned int d2 = _RIPEMD160_IV[3];
-    unsigned int e2 = _RIPEMD160_IV[4];
-
-    /* parallel round 1 */
-    JJJ(a2, b2, c2, d2, e2, x[5], 8);
-    JJJ(e2, a2, b2, c2, d2, x14, 9);
-    JJJ(d2, e2, a2, b2, c2, x[7], 9);
-    JJJ(c2, d2, e2, a2, b2, x[0], 11);
-    JJJ(b2, c2, d2, e2, a2, 0, 13);
-    JJJ(a2, b2, c2, d2, e2, x[2], 15);
-    JJJ(e2, a2, b2, c2, d2, 0, 15);
-    JJJ(d2, e2, a2, b2, c2, x[4], 5);
-    JJJ(c2, d2, e2, a2, b2, 0, 7);
-    JJJ(b2, c2, d2, e2, a2, x[6], 7);
-    JJJ(a2, b2, c2, d2, e2, 0, 8);
-    JJJ(e2, a2, b2, c2, d2, x8, 11);
-    JJJ(d2, e2, a2, b2, c2, x[1], 14);
-    JJJ(c2, d2, e2, a2, b2, 0, 14);
-    JJJ(b2, c2, d2, e2, a2, x[3], 12);
-    JJJ(a2, b2, c2, d2, e2, 0, 6);
+    __private unsigned int digest1[5];
+    __private unsigned int digest2[5];
 
-    /* parallel round 2 */
-    III(e2, a2, b2, c2, d2, x[6], 9);
-    III(d2, e2, a2, b2, c2, 0, 13);
-    III(c2, d2, e2, a2, b2, x[3], 15);
-    III(b2, c2, d2, e2, a2, x[7], 7);
-    III(a2, b2, c2, d2, e2, x[0], 12);
-    III(e2, a2, b2, c2, d2, 0, 8);
-    III(d2, e2, a2, b2, c2, x[5], 9);
-    III(c2, d2, e2, a2, b2, 0, 11);
-    III(b2, c2, d2, e2, a2, x14, 7);
-    III(a2, b2, c2, d2, e2, 0, 7);
-    III(e2, a2, b2, c2, d2, x8, 12);
-    III(d2, e2, a2, b2, c2, 0, 7);
-    III(c2, d2, e2, a2, b2, x[4], 6);
-    III(b2, c2, d2, e2, a2, 0, 15);
-    III(a2, b2, c2, d2, e2, x[1], 13);
-    III(e2, a2, b2, c2, d2, x[2], 11);
+    ripemd160p1(x, digest1);
+    ripemd160p2(x, digest2);
 
-    /* parallel round 3 */
-    HHH(d2, e2, a2, b2, c2, 0, 9);
-    HHH(c2, d2, e2, a2, b2, x[5], 7);
-    HHH(b2, c2, d2, e2, a2, x[1], 15);
-    HHH(a2, b2, c2, d2, e2, x[3], 11);
-    HHH(e2, a2, b2, c2, d2, x[7], 8);
-    HHH(d2, e2, a2, b2, c2, x14, 6);
-    HHH(c2, d2, e2, a2, b2, x[6], 6);
-    HHH(b2, c2, d2, e2, a2, 0, 14);
-    HHH(a2, b2, c2, d2, e2, 0, 12);
-    HHH(e2, a2, b2, c2, d2, x8, 13);
-    HHH(d2, e2, a2, b2, c2, 0, 5);
-    HHH(c2, d2, e2, a2, b2, x[2], 14);
-    HHH(b2, c2, d2, e2, a2, 0, 13);
-    HHH(a2, b2, c2, d2, e2, x[0], 13);
-    HHH(e2, a2, b2, c2, d2, x[4], 7);
-    HHH(d2, e2, a2, b2, c2, 0, 5);
-
-    /* parallel round 4 */
-    GGG(c2, d2, e2, a2, b2, x8, 15);
-    GGG(b2, c2, d2, e2, a2, x[6], 5);
-    GGG(a2, b2, c2, d2, e2, x[4], 8);
-    GGG(e2, a2, b2, c2, d2, x[1], 11);
-    GGG(d2, e2, a2, b2, c2, x[3], 14);
-    GGG(c2, d2, e2, a2, b2, 0, 14);
-    GGG(b2, c2, d2, e2, a2, 0, 6);
-    GGG(a2, b2, c2, d2, e2, x[0], 14);
-    GGG(e2, a2, b2, c2, d2, x[5], 6);
-    GGG(d2, e2, a2, b2, c2, 0, 9);
-    GGG(c2, d2, e2, a2, b2, x[2], 12);
-    GGG(b2, c2, d2, e2, a2, 0, 9);
-    GGG(a2, b2, c2, d2, e2, 0, 12);
-    GGG(e2, a2, b2, c2, d2, x[7], 5);
-    GGG(d2, e2, a2, b2, c2, 0, 15);
-    GGG(c2, d2, e2, a2, b2, x14, 8);
+    digest[0] = digest1[0] + digest2[0];
+    digest[1] = digest1[1] + digest2[1];
+    digest[2] = digest1[2] + digest2[2];
+    digest[3] = digest1[3] + digest2[3];
+    digest[4] = digest1[4] + digest2[4];
+}
 
-    /* parallel round 5 */
-    FFF(b2, c2, d2, e2, a2, 0, 8);
-    FFF(a2, b2, c2, d2, e2, 0, 5);
-    FFF(e2, a2, b2, c2, d2, 0, 12);
-    FFF(d2, e2, a2, b2, c2, x[4], 9);
-    FFF(c2, d2, e2, a2, b2, x[1], 12);
-    FFF(b2, c2, d2, e2, a2, x[5], 5);
-    FFF(a2, b2, c2, d2, e2, x8, 14);
-    FFF(e2, a2, b2, c2, d2, x[7], 6);
-    FFF(d2, e2, a2, b2, c2, x[6], 8);
-    FFF(c2, d2, e2, a2, b2, x[2], 13);
-    FFF(b2, c2, d2, e2, a2, 0, 6);
-    FFF(a2, b2, c2, d2, e2, x14, 5);
-    FFF(e2, a2, b2, c2, d2, x[0], 15);
-    FFF(d2, e2, a2, b2, c2, x[3], 13);
-    FFF(c2, d2, e2, a2, b2, 0, 11);
-    FFF(b2, c2, d2, e2, a2, 0, 11);
-
-    digest[0] = c1 + d2;
-    digest[1] = d1 + e2;
-    digest[2] = e1 + a2;
-    digest[3] = a1 + b2;
-    digest[4] = b1 + c2;
+void ripemd160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
+{
+    hOut[0] = endian(hIn[0] + RIPEMD160_IV[1]);
+    hOut[1] = endian(hIn[1] + RIPEMD160_IV[2]);
+    hOut[2] = endian(hIn[2] + RIPEMD160_IV[3]);
+    hOut[3] = endian(hIn[3] + RIPEMD160_IV[4]);
+    hOut[4] = endian(hIn[4] + RIPEMD160_IV[0]);
 }
+
 #endif
diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl
index 2a63f3f..95c5556 100644
--- a/clMath/secp256k1.cl
+++ b/clMath/secp256k1.cl
@@ -1,91 +1,41 @@
-#ifndef _SECP256K1_CL
-#define _SECP256K1_CL
-
-typedef ulong uint64_t;
-
-typedef struct {
-    uint v[8];
-}uint256_t;
+#ifndef SECP256K1_CL
+#define SECP256K1_CL
 
+typedef struct uint256_t {
+    unsigned int v[8];
+} uint256_t;
 
 /**
- Prime modulus 2^256 - 2^32 - 977
+ * Base point X
  */
-__constant unsigned int _P[8] = {
-    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
-};
-
-__constant unsigned int _P_MINUS1[8] = {
-    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
-};
-
-/**
- Base point X
- */
-__constant unsigned int _GX[8] = {
+__constant unsigned int GX[8] = {
     0x79BE667E, 0xF9DCBBAC, 0x55A06295, 0xCE870B07, 0x029BFCDB, 0x2DCE28D9, 0x59F2815B, 0x16F81798
 };
 
 /**
- Base point Y
+ * Base point Y
  */
-__constant unsigned int _GY[8] = {
+__constant unsigned int GY[8] = {
     0x483ADA77, 0x26A3C465, 0x5DA4FBFC, 0x0E1108A8, 0xFD17B448, 0xA6855419, 0x9C47D08F, 0xFB10D4B8
 };
 
-
 /**
  * Group order
  */
-__constant unsigned int _N[8] = {
+__constant unsigned int N[8] = {
     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xBAAEDCE6, 0xAF48A03B, 0xBFD25E8C, 0xD0364141
 };
 
-__constant unsigned int _INFINITY[8] = {
-    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
+/**
+ * Prime modulus 2^256 - 2^32 - 977
+ */
+__constant unsigned int P[8] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
 };
 
-void printBigInt(const unsigned int x[8])
-{
-    printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
-        x[0], x[1], x[2], x[3],
-        x[4], x[5], x[6], x[7]);
-}
-
-// Add with carry
-unsigned int addc(unsigned int a, unsigned int b, unsigned int *carry)
-{
-    unsigned int sum = a + *carry;
-
-    unsigned int c1 = (sum < a) ? 1 : 0;
-
-    sum = sum + b;
-    
-    unsigned int c2 = (sum < b) ? 1 : 0;
-
-    *carry = c1 | c2;
-
-    return sum;
-}
-
-// Subtract with borrow
-unsigned int subc(unsigned int a, unsigned int b, unsigned int *borrow)
-{
-    unsigned int diff = a - *borrow;
-
-    *borrow = (diff > a) ? 1 : 0;
-
-    unsigned int diff2 = diff - b;
-
-    *borrow |= (diff2 > diff) ? 1 : 0;
-
-    return diff2;
-}
-
 #ifdef DEVICE_VENDOR_INTEL
-
 // Intel devices have a mul_hi bug
-unsigned int mul_hi977(unsigned int x)
+inline unsigned int mul_hi977(unsigned int x)
 {
     unsigned int high = x >> 16;
     unsigned int low = x & 0xffff;
@@ -94,657 +44,668 @@ unsigned int mul_hi977(unsigned int x)
 }
 
 // 32 x 32 multiply-add
-void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c)
+inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c)
 {
-    *low = a * 977;
-    unsigned int tmp = *low + c;
+    *low = *a * 977;
+    unsigned int tmp = *low + *c;
     unsigned int carry = tmp < *low ? 1 : 0;
     *low = tmp;
-    *high = mul_hi977(a) + carry;
+    *high = mul_hi977(*a) + carry;
 }
-
 #else
 
-// 32 x 32 multiply-add
-void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c)
+inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c)
 {
-    *low = a * 977;
-    unsigned int tmp = *low + c;
+    *low = *a * 977;
+    unsigned int tmp = *low + *c;
     unsigned int carry = tmp < *low ? 1 : 0;
     *low = tmp;
-    *high = mad_hi(a, (unsigned int)977, carry);
+    *high = mad_hi(*a, (unsigned int)977, carry);
 }
 
 #endif
 
-// 32 x 32 multiply-add
-void madd(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b, unsigned int c)
-{
-    *low = a * b;
-    unsigned int tmp = *low + c;
-    unsigned int carry = tmp < *low ? 1 : 0;
-    *low = tmp;
-    *high = mad_hi(a, b, carry);
-}
-
-void mull(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b)
-{
-    *low = a * b;
-    *high = mul_hi(a, b);
-}
-
-
-uint256_t sub256k(uint256_t a, uint256_t b, unsigned int* borrow_ptr)
-{
-    unsigned int borrow = 0;
-    uint256_t c;
-
-    for(int i = 7; i >= 0; i--) {
-        c.v[i] = subc(a.v[i], b.v[i], &borrow);
-    }
-
-    *borrow_ptr = borrow;
-
-    return c;
-}
-
-bool greaterThanEqualToP(const unsigned int a[8])
-{
-    for(int i = 0; i < 8; i++) {
-        if(a[i] > _P_MINUS1[i]) {
-            return true;
-        } else if(a[i] < _P_MINUS1[i]) {
-            return false;
-        }
-    }
-
-    return true;
-}
+// Add with carry
+#define addc(a, b, sum, carry, tmp)      \
+    sum = (a) + (carry);                 \
+    tmp = ((sum) < (a)) * 1;             \
+    sum = (sum) + (b);                   \
+    carry = (tmp) | (((sum) < (b)) * 1);
+
+// subtract with borrow
+#define subc(a, b, diff, borrow, tmp)    \
+    tmp = (a) - (borrow);                \
+    borrow = ((tmp) > (a)) * 1;          \
+    diff = (tmp) - (b);                  \
+    borrow |= ((diff) > (tmp)) ? 1 : 0;
+
+#define add256k(a, b, c, carry, tmp)    \
+    addc(a[7], b[7], c[7], carry, tmp); \
+    addc(a[6], b[6], c[6], carry, tmp); \
+    addc(a[5], b[5], c[5], carry, tmp); \
+    addc(a[4], b[4], c[4], carry, tmp); \
+    addc(a[3], b[3], c[3], carry, tmp); \
+    addc(a[2], b[2], c[2], carry, tmp); \
+    addc(a[1], b[1], c[1], carry, tmp); \
+    addc(a[0], b[0], c[0], carry, tmp);
+
+#define sub256k( a, b, c, borrow, tmp)   \
+    subc(a[7], b[7], c[7], borrow, tmp); \
+    subc(a[6], b[6], c[6], borrow, tmp); \
+    subc(a[5], b[5], c[5], borrow, tmp); \
+    subc(a[4], b[4], c[4], borrow, tmp); \
+    subc(a[3], b[3], c[3], borrow, tmp); \
+    subc(a[2], b[2], c[2], borrow, tmp); \
+    subc(a[1], b[1], c[1], borrow, tmp); \
+    subc(a[0], b[0], c[0], borrow, tmp);
+
+#define isInfinity256k(a)        \
+    (                           \
+        (a[0] == 0xffffffff) && \
+        (a[1] == 0xffffffff) && \
+        (a[2] == 0xffffffff) && \
+        (a[3] == 0xffffffff) && \
+        (a[4] == 0xffffffff) && \
+        (a[5] == 0xffffffff) && \
+        (a[6] == 0xffffffff) && \
+        (a[7] == 0xffffffff)    \
+    )
+
+#define greaterOrEqualToP(a)    \
+    (a[6] >= P[6] || a[7] >= P[7])
+
+#define equal256k(a, b)   \
+    (                     \
+        (a[0] == b[0]) && \
+        (a[1] == b[1]) && \
+        (a[2] == b[2]) && \
+        (a[3] == b[3]) && \
+        (a[4] == b[4]) && \
+        (a[5] == b[5]) && \
+        (a[6] == b[6]) && \
+        (a[7] == b[7])    \
+    )
 
 void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int out_high[8], unsigned int out_low[8])
 {
-    unsigned int z[16];
-
-    unsigned int high = 0;
+    __private unsigned long product;
 
     // First round, overwrite z
-    for(int j = 7; j >= 0; j--) {
-
-        uint64_t product = (uint64_t)x[7] * y[j];
-
-        product = product + high;
-
-        z[7 + j + 1] = (unsigned int)product;
-        high = (unsigned int)(product >> 32);
-    }
-    z[7] = high;
-
-    for(int i = 6; i >= 0; i--) {
-
-        high = 0;
-
-        for(int j = 7; j >= 0; j--) {
-
-            uint64_t product = (uint64_t)x[i] * y[j];
-
-            product = product + z[i + j + 1] + high;
+    product = (unsigned long)x[7] * y[7];
+    out_low[7] = (unsigned int)product;
+    
+    product = (unsigned long)x[7] * y[6] + (unsigned int)(product >> 32);
+    out_low[6] = (unsigned int)product;
+    
+    product = (unsigned long)x[7] * y[5] + (unsigned int)(product >> 32);
+    out_low[5] = (unsigned int)product;
+    
+    product = (unsigned long)x[7] * y[4] + (unsigned int)(product >> 32);
+    out_low[4] = (unsigned int)product;
+    
+    product = (unsigned long)x[7] * y[3] + (unsigned int)(product >> 32);
+    out_low[3] = (unsigned int)product;
+    
+    product = (unsigned long)x[7] * y[2] + (unsigned int)(product >> 32);
+    out_low[2] = (unsigned int)product;
+        
+    product = (unsigned long)x[7] * y[1] + (unsigned int)(product >> 32);
+    out_low[1] = (unsigned int)product;
+        
+    product = (unsigned long)x[7] * y[0] + (unsigned int)(product >> 32);
+    out_low[0] = (unsigned int)product;
+    out_high[7] = (unsigned int)(product >> 32);
+
+    product = (unsigned long)x[6] * y[7] + out_low[6];
+    out_low[6] = (unsigned int)product;
+
+    /** round6 */
+    product = (unsigned long)x[6] * y[6] + out_low[5] + (product >> 32);
+    out_low[5] = (unsigned int)product;
+
+    product = (unsigned long)x[6] * y[5] + out_low[4] + (product >> 32);
+    out_low[4] = (unsigned int)product;
+
+    product = (unsigned long)x[6] * y[4] + out_low[3] + (product >> 32);
+    out_low[3] = (unsigned int)product;
+
+    product = (unsigned long)x[6] * y[3] + out_low[2] + (product >> 32);
+    out_low[2] = (unsigned int)product;
+
+    product = (unsigned long)x[6] * y[2] + out_low[1] + (product >> 32);
+    out_low[1] = (unsigned int)product;
+    
+    product = (unsigned long)x[6] * y[1] + out_low[0] + (product >> 32);
+    out_low[0] = (unsigned int)product;
+    
+    product = (unsigned long)x[6] * y[0] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
+    out_high[6] = product >> 32;
 
-            z[i + j + 1] = (unsigned int)product;
+    /** round 5 */
+    product = (unsigned long)x[5] * y[7] + out_low[5];
+    out_low[5] = (unsigned int)product;
 
-            high = product >> 32;
-        }
+    product = (unsigned long)x[5] * y[6] + out_low[4] + (product >> 32);
+    out_low[4] = (unsigned int)product;
 
-        z[i] = high;
-    }
+    product = (unsigned long)x[5] * y[5] + out_low[3] + (product >> 32);
+    out_low[3] = (unsigned int)product;
 
-    for(int i = 0; i < 8; i++) {
-        out_high[i] = z[i];
-        out_low[i] = z[8 + i];
-    }
-}
+    product = (unsigned long)x[5] * y[4] + out_low[2] + (product >> 32);
+    out_low[2] = (unsigned int)product;
 
+    product = (unsigned long)x[5] * y[3] + out_low[1] + (product >> 32);
+    out_low[1] = (unsigned int)product;
 
-unsigned int add256(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
-    unsigned int carry = 0;
-
-    for(int i = 7; i >= 0; i--) {
-        c[i] = addc(a[i], b[i], &carry);
-    }
-
-    return carry;
-}
-
-uint256_t add256k(uint256_t a, uint256_t b, unsigned int* carry_ptr)
-{
-    uint256_t c;
-    unsigned int carry = 0;
+    product = (unsigned long)x[5] * y[2] + out_low[0] + (product >> 32);
+    out_low[0] = (unsigned int)product;
+    
+    product = (unsigned long)x[5] * y[1] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
+    
+    product = (unsigned long)x[5] * y[0] + out_high[6] + (product >> 32);
+    out_high[6] = (unsigned int)product;
+    out_high[5] = product >> 32;
 
-    for(int i = 7; i >= 0; i--) {
-        c.v[i] = addc(a.v[i], b.v[i], &carry);
-    }
+    /** round 4 */
+    product = (unsigned long)x[4] * y[7] + out_low[4];
+    out_low[4] = (unsigned int)product;
 
-    *carry_ptr = carry;
+    product = (unsigned long)x[4] * y[6] + out_low[3] + (product >> 32);
+    out_low[3] = (unsigned int)product;
 
-    return c;
-}
+    product = (unsigned long)x[4] * y[5] + out_low[2] + (product >> 32);
+    out_low[2] = (unsigned int)product;
 
-bool isInfinity(const unsigned int x[8])
-{
-    bool isf = true;
+    product = (unsigned long)x[4] * y[4] + out_low[1] + (product >> 32);
+    out_low[1] = (unsigned int)product;
 
-    for(int i = 0; i < 8; i++) {
-        if(x[i] != 0xffffffff) {
-            isf = false;
-        }
-    }
+    product = (unsigned long)x[4] * y[3] + out_low[0] + (product >> 32);
+    out_low[0] = (unsigned int)product;
 
-    return isf;
-}
-
-bool isInfinity256k(const uint256_t x)
-{
-    bool isf = true;
+    product = (unsigned long)x[4] * y[2] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
+    
+    product = (unsigned long)x[4] * y[1] + out_high[6] + (product >> 32);
+    out_high[6] = (unsigned int)product;
+    
+    product = (unsigned long)x[4] * y[0] + out_high[5] + (product >> 32);
+    out_high[5] = (unsigned int)product;
+    out_high[4] = product >> 32;
 
-    for(int i = 0; i < 8; i++) {
-        if(x.v[i] != 0xffffffff) {
-            isf = false;
-        }
-    }
+    /** round 3 */
+    product = (unsigned long)x[3] * y[7] + out_low[3];
+    out_low[3] = (unsigned int)product;
 
-    return isf;
-}
+    product = (unsigned long)x[3] * y[6] + out_low[2] + (product >> 32);
+    out_low[2] = (unsigned int)product;
 
-bool equal(const unsigned int a[8], const unsigned int b[8])
-{
-    for(int i = 0; i < 8; i++) {
-        if(a[i] != b[i]) {
-            return false;
-        }
-    }
+    product = (unsigned long)x[3] * y[5] + out_low[1] + (product >> 32);
+    out_low[1] = (unsigned int)product;
 
-    return true;
-}
+    product = (unsigned long)x[3] * y[4] + out_low[0] + (product >> 32);
+    out_low[0] = (unsigned int)product;
 
-bool equal256k(uint256_t a, uint256_t b)
-{
-    for(int i = 0; i < 8; i++) {
-        if(a.v[i] != b.v[i]) {
-            return false;
-        }
-    }
+    product = (unsigned long)x[3] * y[3] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
 
-    return true;
-}
+    product = (unsigned long)x[3] * y[2] + out_high[6] + (product >> 32);
+    out_high[6] = (unsigned int)product;
+    
+    product = (unsigned long)x[3] * y[1] + out_high[5] + (product >> 32);
+    out_high[5] = (unsigned int)product;
+    
+    product = (unsigned long)x[3] * y[0] + out_high[4] + (product >> 32);
+    out_high[4] = (unsigned int)product;
+    out_high[3] = product >> 32;
 
-inline uint256_t readInt256(__global const uint256_t* ara, int idx)
-{
-    return ara[idx];
-}
+    /** round 2 */
+    product = (unsigned long)x[2] * y[7] + out_low[2];
+    out_low[2] = (unsigned int)product;
 
-/*
- * Read least-significant word
- */
-unsigned int readLSW(__global const unsigned int *ara, int idx)
-{
-    return ara[idx * 8 + 7];
-}
+    product = (unsigned long)x[2] * y[6] + out_low[1] + (product >> 32);
+    out_low[1] = (unsigned int)product;
 
-unsigned int readLSW256k(__global const uint256_t* ara, int idx)
-{
-    return ara[idx].v[7];
-}
+    product = (unsigned long)x[2] * y[5] + out_low[0] + (product >> 32);
+    out_low[0] = (unsigned int)product;
 
-unsigned int readWord256k(__global const uint256_t* ara, int idx, int word)
-{
-    return ara[idx].v[word];
-}
+    product = (unsigned long)x[2] * y[4] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
 
-unsigned int addP(const unsigned int a[8], unsigned int c[8])
-{
-    unsigned int carry = 0;
+    product = (unsigned long)x[2] * y[3] + out_high[6] + (product >> 32);
+    out_high[6] = (unsigned int)product;
 
-    for(int i = 7; i >= 0; i--) {
-        c[i] = addc(a[i], _P[i], &carry);
-    }
+    product = (unsigned long)x[2] * y[2] + out_high[5] + (product >> 32);
+    out_high[5] = (unsigned int)product;
+    
+    product = (unsigned long)x[2] * y[1] + out_high[4] + (product >> 32);
+    out_high[4] = (unsigned int)product;
+    
+    product = (unsigned long)x[2] * y[0] + out_high[3] + (product >> 32);
+    out_high[3] = (unsigned int)product;
+    out_high[2] = product >> 32;
+    
+    /** round 1 */
+    product = (unsigned long)x[1] * y[7] + out_low[1];
+    out_low[1] = (unsigned int)product;
 
-    return carry;
-}
+    product = (unsigned long)x[1] * y[6] + out_low[0] + (product >> 32);
+    out_low[0] = (unsigned int)product;
 
-unsigned int subP(const unsigned int a[8], unsigned int c[8])
-{
-    unsigned int borrow = 0;
-    for(int i = 7; i >= 0; i--) {
-        c[i] = subc(a[i], _P[i], &borrow);
-    }
+    product = (unsigned long)x[1] * y[5] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
 
-    return borrow;
-}
+    product = (unsigned long)x[1] * y[4] + out_high[6] + (product >> 32);
+    out_high[6] = (unsigned int)product;
 
-/**
- * Subtraction mod p
- */
-uint256_t subModP256k(uint256_t a, uint256_t b)
-{
-    unsigned int borrow = 0;
-    uint256_t c = sub256k(a, b, &borrow);
-    if(borrow) {
-        addP(c.v, c.v);
-    }
+    product = (unsigned long)x[1] * y[3] + out_high[5] + (product >> 32);
+    out_high[5] = (unsigned int)product;
 
-    return c;
-}
+    product = (unsigned long)x[1] * y[2] + out_high[4] + (product >> 32);
+    out_high[4] = (unsigned int)product;
+    
+    product = (unsigned long)x[1] * y[1] + out_high[3] + (product >> 32);
+    out_high[3] = (unsigned int)product;
+    
+    product = (unsigned long)x[1] * y[0] + out_high[2] + (product >> 32);
+    out_high[2] = (unsigned int)product;
+    out_high[1] = product >> 32;
 
+    /** round 0 */
+    product = (unsigned long)x[0] * y[7] + out_low[0];
+    out_low[0] = (unsigned int)product;
 
-uint256_t addModP256k(uint256_t a, uint256_t b)
-{
-    unsigned int carry = 0;
+    product = (unsigned long)x[0] * y[6] + out_high[7] + (product >> 32);
+    out_high[7] = (unsigned int)product;
 
-    uint256_t c = add256k(a, b, &carry);
+    product = (unsigned long)x[0] * y[5] + out_high[6] + (product >> 32);
+    out_high[6] = (unsigned int)product;
 
-    bool gt = false;
-    for(int i = 0; i < 8; i++) {
-        if(c.v[i] > _P[i]) {
-            gt = true;
-            break;
-        } else if(c.v[i] < _P[i]) {
-            break;
-        }
-    }
+    product = (unsigned long)x[0] * y[4] + out_high[5] + (product >> 32);
+    out_high[5] = (unsigned int)product;
 
-    if(carry || gt) {
-        subP(c.v, c.v);
-    }
+    product = (unsigned long)x[0] * y[3] + out_high[4] + (product >> 32);
+    out_high[4] = (unsigned int)product;
 
-    return c;
+    product = (unsigned long)x[0] * y[2] + out_high[3] + (product >> 32);
+    out_high[3] = (unsigned int)product;
+    
+    product = (unsigned long)x[0] * y[1] + out_high[2] + (product >> 32);
+    out_high[2] = (unsigned int)product;
+    
+    product = (unsigned long)x[0] * y[0] + out_high[1] + (product >> 32);
+    out_high[1] = (unsigned int)product;
+    out_high[0] = product >> 32;
 }
 
-
-void mulModP(const unsigned int a[8], const unsigned int b[8], unsigned int product_low[8])
+void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8])
 {
-    unsigned int high[8];
+    __private unsigned int high[8];
+    __private unsigned int low[8];
 
-    unsigned int hWord = 0;
-    unsigned int carry = 0;
+    __private unsigned int hWord = 0;
+    __private unsigned int carry = 0;
+    __private unsigned int t = 0;
+    __private unsigned int product6 = 0;
+    __private unsigned int product7 = 0;
+    __private unsigned int tmp;
 
     // 256 x 256 multiply
-    multiply256(a, b, high, product_low);
+    multiply256(a, b, high, low);
+    product_low[7] = low[7];
+    product_low[6] = low[6];
+    product_low[5] = low[5];
+    product_low[4] = low[4];
+    product_low[3] = low[3];
+    product_low[2] = low[2];
+    product_low[1] = low[1];
+    product_low[0] = low[0];
 
     // Add 2^32 * high to the low 256 bits (shift left 1 word and add)
     // Affects product[14] to product[6]
-    for(int i = 6; i >= 0; i--) {
-        product_low[i] = addc(product_low[i], high[i + 1], &carry);
-    }
-    unsigned int product7 = addc(high[0], 0, &carry);
-    unsigned int product6 = carry;
+    addc(product_low[6], high[7], product_low[6], carry, tmp);
+    addc(product_low[5], high[6], product_low[5], carry, tmp);
+    addc(product_low[4], high[5], product_low[4], carry, tmp);
+    addc(product_low[3], high[4], product_low[3], carry, tmp);
+    addc(product_low[2], high[3], product_low[2], carry, tmp);
+    addc(product_low[1], high[2], product_low[1], carry, tmp);
+    addc(product_low[0], high[1], product_low[0], carry, tmp);
+
+    addc(high[0], 0, product7, carry, tmp);
+    product6 = carry;
 
     carry = 0;
 
     // Multiply high by 977 and add to low
     // Affects product[15] to product[5]
     for(int i = 7; i >= 0; i--) {
-        unsigned int t = 0;
-        madd977(&hWord, &t, high[i], hWord);
-        product_low[i] = addc(product_low[i], t, &carry);
+        madd977(&hWord, &t, &high[i], &hWord);
+        addc(product_low[i], t, product_low[i], carry, tmp);
+        t = 0;
     }
-    product7 = addc(product7, hWord, &carry);
-    product6 = addc(product6, 0, &carry);
+    addc(product7, hWord, high[7], carry, tmp);
+    addc(product6, 0, high[6], carry, tmp);
 
     // Multiply high 2 words by 2^32 and add to low
     // Affects product[14] to product[7]
     carry = 0;
-    high[7] = product7;
-    high[6] = product6;
 
-    product7 = 0;
-    product6 = 0;
+    addc(product_low[6], high[7], product_low[6], carry, tmp);
+    addc(product_low[5], high[6], product_low[5], carry, tmp);
 
-    product_low[6] = addc(product_low[6], high[7], &carry);
-    product_low[5] = addc(product_low[5], high[6], &carry);
-
-    // Propagate the carry
-    for(int i = 4; i >= 0; i--) {
-        product_low[i] = addc(product_low[i], 0, &carry);
-    }
-    product7 = carry;
+    addc(product_low[4], 0, product_low[4], carry, tmp);
+    addc(product_low[3], 0, product_low[3], carry, tmp);
+    addc(product_low[2], 0, product_low[2], carry, tmp);
+    addc(product_low[1], 0, product_low[1], carry, tmp);
+    addc(product_low[0], 0, product_low[0], carry, tmp);
 
     // Multiply top 2 words by 977 and add to low
     // Affects product[15] to product[7]
     carry = 0;
     hWord = 0;
-    unsigned int t = 0;
-    madd977(&hWord, &t, high[7], hWord);
-    product_low[7] = addc(product_low[7], t, &carry);
-    madd977(&hWord, &t, high[6], hWord);
-    product_low[6] = addc(product_low[6], t, &carry);
-    product_low[5] = addc(product_low[5], hWord, &carry);
-
+    madd977(&hWord, &t, &high[7], &hWord);
+    addc(product_low[7], t, product_low[7], carry, tmp);
+    madd977(&hWord, &t, &high[6], &hWord);
+    addc(product_low[6], t,  product_low[6], carry, tmp);
+    addc(product_low[5], hWord,  product_low[5], carry, tmp);
     // Propagate carry
-    for(int i = 4; i >= 0; i--) {
-        product_low[i] = addc(product_low[i], 0, &carry);
-    }
-    product7 = carry;
+    addc(product_low[4], 0, product_low[4], carry, tmp);
+    addc(product_low[3], 0, product_low[3], carry, tmp);
+    addc(product_low[2], 0, product_low[2], carry, tmp);
+    addc(product_low[1], 0, product_low[1], carry, tmp);
+    addc(product_low[0], 0, product_low[0], carry, tmp);
 
     // Reduce if >= P
-    if(product7 || greaterThanEqualToP(product_low)) {
-        subP(product_low, product_low);
+    if(carry || greaterOrEqualToP(product_low)) {
+        carry = 0;
+        sub256k(product_low, P, product_low, carry, tmp);
     }
 }
 
-uint256_t mulModP256k(uint256_t a, uint256_t b)
+/**
+ * Subtraction mod p
+ */
+void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8])
 {
-    uint256_t c;
+    __private unsigned int borrow = 0;
+    __private unsigned int tmp;
+    
+    sub256k(a, b, c, borrow, tmp);
+    
+    if (borrow) {
+        borrow = 0;
+        add256k(c, P, c, borrow, tmp);
+    }
+}
 
-    mulModP(a.v, b.v, c.v);
+/**
+ * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains
+ */
+void invModP256k(unsigned int x[8])
+{
+    __private unsigned int y[8] = {0, 0, 0, 0, 0, 0, 0, 1};
+
+    mulModP(x, y, y);
+    mulModP(x, x, x);
+    mulModP(x, x, x);
+    mulModP(x, y, y);
+    mulModP(x, x, x);
+    mulModP(x, y, y);
+    mulModP(x, x, x);
+    mulModP(x, x, x);
+    mulModP(x, y, y);
+
+    for(int i = 0; i < 5; i++) {
+        mulModP(x, x, x);
+    }
 
-    return c;
-}
+    for(int i = 0; i < 22; i++) {
+        mulModP(x, y, y);
+        mulModP(x, x, x);
+    }
 
+    mulModP(x, x, x);
 
-uint256_t squareModP256k(uint256_t a)
-{
-    uint256_t b;
-    mulModP(a.v, a.v, b.v);
+    for(int i = 0; i < 222; i++) {
+        mulModP(x, y, y);
+        mulModP(x, x, x);
+    }
 
-    return b;
+    mulModP(x, y, x);
 }
 
-
-/**
- * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains
- */
-uint256_t invModP256k(uint256_t value)
+void addModP256k(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
 {
-    uint256_t x = value;
-
-
-    //unsigned int y[8] = { 0, 0, 0, 0, 0, 0, 0, 1 };
-    uint256_t y = {{0, 0, 0, 0, 0, 0, 0, 1}};
-
-    // 0xd - 1101
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-
-    // 0x2 - 0010
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-
-    // 0xc = 0x1100
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-
-
-    // 0xfffff
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-
-
-    // 0xe - 1110
-    //y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    y = mulModP256k(x, y);
-    x = squareModP256k(x);
-    // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffff
-    for(int i = 0; i < 219; i++) {
-        y = mulModP256k(x, y);
-        x = squareModP256k(x);
-    }
-    y = mulModP256k(x, y);
+    __private unsigned int borrow = 0;
+    __private unsigned int carry = 0;
+    __private unsigned int tmp = 0;
 
-    return y;
-}
+    add256k(a, b, c, carry, tmp);
+
+    if(carry) { sub256k(c, P, c, borrow, tmp); }
 
+    else if(c[0] > P[0]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[0] < P[0]) {  }
 
-void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse)
+    else if(c[1] > P[1]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[1] < P[1]) {  }
+
+    else if(c[2] > P[2]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[2] < P[2]) {  }
+    
+    else if(c[3] > P[3]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[3] < P[3]) {  }
+    
+    else if(c[4] > P[4]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[4] < P[4]) {  }
+    
+    else if(c[5] > P[5]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[5] < P[5]) {  }
+    
+    else if(c[6] > P[6]) { sub256k(c, P, c, borrow, tmp); } 
+    else if(c[6] < P[6]) {  }
+
+    else if(c[7] > P[7]) { sub256k(c, P, c, borrow, tmp); } 
+}
+
+void doBatchInverse256k(unsigned int x[8])
 {
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
-    int dim = get_global_size(0);
+    invModP256k(x);
+}
+
+void beginBatchAdd256k(
+    const uint256_t px,
+    const uint256_t x,
+    __global uint256_t* chain,
+    const int i,
+    const int batchIdx,
+    uint256_t* inverse
+) {
+    __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+    __private int dim = get_global_size(0);
+
+    __private unsigned int t[8];
 
     // x = Gx - x
-    uint256_t t = subModP256k(px, x);
+    subModP256k(px.v, x.v, t);
 
 
     // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1,
     // c[2] = diff2 * diff1 * diff0, etc
-    *inverse = mulModP256k(*inverse, t);
+    mulModP(inverse->v, t, inverse->v);
 
     chain[batchIdx * dim + gid] = *inverse;
 }
 
-
-void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* xPtr, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse)
-{
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
-    int dim = get_global_size(0);
-
-    uint256_t x = xPtr[i];
-
-    if(equal256k(px, x)) {
-        x = addModP256k(py, py);
+void beginBatchAddWithDouble256k(
+    const uint256_t px,
+    const uint256_t py,
+    __global uint256_t* xPtr,
+    __global uint256_t* chain,
+    const int i,
+    const int batchIdx,
+    uint256_t* inverse
+) {
+    __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+    __private int dim = get_global_size(0);
+    __private uint256_t x = xPtr[i];
+
+    if(equal256k(px.v, x.v)) {
+        addModP256k(py.v,py.v, x.v);
     } else {
         // x = Gx - x
-        x = subModP256k(px, x);
+        subModP256k(px.v, x.v, x.v);
     }
 
     // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1,
     // c[2] = diff2 * diff1 * diff0, etc
-    *inverse = mulModP256k(x, *inverse);
+    mulModP(x.v, inverse->v, inverse->v);
 
     chain[batchIdx * dim + gid] = *inverse;
 }
 
+void completeBatchAdd256k(
+    const uint256_t px,
+    const uint256_t py,
+    __global uint256_t* xPtr,
+    __global uint256_t* yPtr,
+    const int i,
+    const int batchIdx,
+    __global uint256_t* chain,
+    uint256_t* inverse,
+    uint256_t* newX,
+    uint256_t* newY)
+{
+    __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+    __private int dim = get_global_size(0);
+    __private uint256_t x = xPtr[i];
+    __private uint256_t y = yPtr[i];
+	
+    uint256_t s;
+    __private unsigned int tmp[8];
+
+    if(batchIdx != 0) {
+        uint256_t c;
+
+        c = chain[(batchIdx - 1) * dim + gid];
+        mulModP(inverse->v, c.v, s.v);
+
+        subModP256k(px.v, x.v, tmp);
+        mulModP(tmp, inverse->v, inverse->v);
+    } else {
+        s = *inverse;
+    }
+
+	subModP256k(py.v, y.v, tmp);
+
+    mulModP(tmp, s.v, s.v);
+
+    // Rx = s^2 - Gx - Qx
+    mulModP(s.v, s.v, tmp);
+
+    subModP256k(tmp, px.v, newX->v);
+    subModP256k(newX->v, x.v, newX->v);
+
+    // Ry = s(px - rx) - py
+	subModP256k(px.v, newX->v, tmp);
+    mulModP(s.v, tmp, newY->v);
+    subModP256k(newY->v, py.v, newY->v);
+}
 
 void completeBatchAddWithDouble256k(
-    uint256_t px,
-    uint256_t py,
+    const uint256_t px,
+    const uint256_t py,
     __global const uint256_t* xPtr,
     __global const uint256_t* yPtr,
-    int i,
-    int batchIdx,
+    const int i,
+    const int batchIdx,
     __global uint256_t* chain,
     uint256_t* inverse,
     uint256_t* newX,
     uint256_t* newY)
 {
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
-    int dim = get_global_size(0);
-    uint256_t s;
-    uint256_t x;
-    uint256_t y;
+    __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+    __private int dim = get_global_size(0);
+    __private uint256_t s;
+    __private uint256_t x;
+    __private uint256_t y;
 
     x = xPtr[i];
     y = yPtr[i];
 
     if(batchIdx >= 1) {
 
-        uint256_t c;
+        __private uint256_t c;
 
         c = chain[(batchIdx - 1) * dim + gid];
-        s = mulModP256k(*inverse, c);
+        mulModP(inverse->v, c.v, s.v);
 
         uint256_t diff;
-        if(equal256k(px, x)) {
-            diff = addModP256k(py, py);
+        if(equal256k(px.v, x.v)) {
+            addModP256k(py.v, py.v, diff.v);
         } else {
-            diff = subModP256k(px, x);
+            subModP256k(px.v, x.v, diff.v);
         }
 
-        *inverse = mulModP256k(diff, *inverse);
+        mulModP(diff.v, inverse->v, inverse->v);
     } else {
         s = *inverse;
     }
 
 
-    if(equal256k(px, x)) {
+    if(equal256k(px.v, x.v)) {
         // currently s = 1 / 2y
 
-        uint256_t x2;
-        uint256_t tx2;
-        uint256_t x3;
+        __private uint256_t x2;
+        __private uint256_t tx2;
 
         // 3x^2
-        x2 = mulModP256k(x, x);
-        tx2 = addModP256k(x2, x2);
-        tx2 = addModP256k(x2, tx2);
+        mulModP(x.v, x.v, x2.v);
+        addModP256k(x2.v, x2.v, tx2.v);
+        addModP256k(x2.v, tx2.v, tx2.v);
 
         // s = 3x^2 * 1/2y
-        s = mulModP256k(tx2, s);
+        mulModP(tx2.v, s.v, s.v);
 
         // s^2
-        uint256_t s2;
-        s2 = mulModP256k(s, s);
+        __private uint256_t s2;
+        mulModP(s.v, s.v, s2.v);
 
         // Rx = s^2 - 2px
-        *newX = subModP256k(s2, x);
-        *newX = subModP256k(*newX, x);
+        subModP256k(s2.v, x.v, newX->v);
+        subModP256k(newX->v, x.v, newX->v);
 
         // Ry = s(px - rx) - py
-        uint256_t k;
-        k = subModP256k(px, *newX);
-        *newY = mulModP256k(s, k);
-        *newY = subModP256k(*newY, py);
+        __private uint256_t k;
+				subModP256k(px.v, newX->v, k.v);
+        mulModP(s.v, k.v, newY->v);
+        subModP256k(newY->v, py.v,newY->v);
     } else {
 
-        uint256_t rise;
-        rise = subModP256k(py, y);
+        __private uint256_t rise;
+        subModP256k(py.v, y.v, rise.v);
 
-        s = mulModP256k(rise, s);
+        mulModP(rise.v, s.v, s.v);
 
         // Rx = s^2 - Gx - Qx
-        uint256_t s2;
-        s2 = mulModP256k(s, s);
+        __private uint256_t s2;
+        mulModP(s.v, s.v, s2.v);
 
-        *newX = subModP256k(s2, px);
-        *newX = subModP256k(*newX, x);
+        subModP256k(s2.v, px.v, newX->v);
+        subModP256k(newX->v, x.v,newX->v);
 
         // Ry = s(px - rx) - py
-        uint256_t k;
-        k = subModP256k(px, *newX);
-        *newY = mulModP256k(s, k);
-        *newY = subModP256k(*newY, py);
+        __private uint256_t k;
+        subModP256k(px.v, newX->v, k.v);
+        mulModP(s.v, k.v, newY->v);
+        subModP256k(newY->v, py.v, newY->v);
     }
 }
 
-
-void completeBatchAdd256k(
-    uint256_t px,
-    uint256_t py,
-    __global uint256_t* xPtr,
-    __global uint256_t* yPtr,
-    int i,
-    int batchIdx,
-    __global uint256_t* chain,
-    uint256_t* inverse,
-    uint256_t* newX,
-    uint256_t* newY)
-{
-    int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
-    int dim = get_global_size(0);
-
-    uint256_t s;
-    uint256_t x;
-
-    x = xPtr[i];
-
-    if(batchIdx >= 1) {
-        uint256_t c;
-
-        c = chain[(batchIdx - 1) * dim + gid];
-        s = mulModP256k(*inverse, c);
-
-        uint256_t diff;
-        diff = subModP256k(px, x);
-        *inverse = mulModP256k(diff, *inverse);
-    } else {
-        s = *inverse;
-    }
-
-    uint256_t y;
-    y = yPtr[i];
-
-    uint256_t rise;
-    rise = subModP256k(py, y);
-
-    s = mulModP256k(rise, s);
-
-    // Rx = s^2 - Gx - Qx
-    uint256_t s2;
-    s2 = mulModP256k(s, s);
-
-    *newX = subModP256k(s2, px);
-    *newX = subModP256k(*newX, x);
-
-    // Ry = s(px - rx) - py
-    uint256_t k;
-    k = subModP256k(px, *newX);
-    *newY = mulModP256k(s, k);
-    *newY = subModP256k(*newY, py);
-}
-
-
-uint256_t doBatchInverse256k(uint256_t x)
+unsigned int readWord256k(__global const uint256_t* ara, const int idx, const int word)
 {
-    return invModP256k(x);
+    return ara[idx].v[word];
 }
 
 #endif
diff --git a/clMath/sha256.cl b/clMath/sha256.cl
index 7cd26ff..4c8ffd6 100644
--- a/clMath/sha256.cl
+++ b/clMath/sha256.cl
@@ -1,6 +1,5 @@
-#ifndef _SHA256_CL
-#define _SHA256_CL
-
+#ifndef SHA256_CL
+#define SHA256_CL
 
 __constant unsigned int _K[64] = {
     0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
@@ -26,7 +25,6 @@ __constant unsigned int _IV[8] = {
 
 #define rotr(x, n) ((x) >> (n)) ^ ((x) << (32 - (n)))
 
-
 #define MAJ(a, b, c) (((a) & (b)) ^ ((a) & (c)) ^ ((b) & (c)))
 
 #define CH(e, f, g) (((e) & (f)) ^ (~(e) & (g)))
@@ -35,17 +33,25 @@ __constant unsigned int _IV[8] = {
 
 #define s1(x) (rotr((x), 17) ^ rotr((x), 19) ^ ((x) >> 10))
 
-#define round(a, b, c, d, e, f, g, h, m, k)\
+#define roundSha(a, b, c, d, e, f, g, h, m, k)\
     t = CH((e), (f), (g)) + (rotr((e), 6) ^ rotr((e), 11) ^ rotr((e), 25)) + (k) + (m);\
     (d) += (t) + (h);\
     (h) += (t) + MAJ((a), (b), (c)) + (rotr((a), 2) ^ rotr((a), 13) ^ rotr((a), 22))
 
-
 void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned int digest[8])
 {
-    unsigned int a, b, c, d, e, f, g, h;
-    unsigned int w[16];
-    unsigned int t;
+    __private unsigned int a, b, c, d, e, f, g, h;
+    __private unsigned int w[16];
+    __private unsigned int t;
+
+    a = _IV[0];
+    b = _IV[1];
+    c = _IV[2];
+    d = _IV[3];
+    e = _IV[4];
+    f = _IV[5];
+    g = _IV[6];
+    h = _IV[7];
 
     // 0x04 || x || y
     w[0] = (x[0] >> 8) | 0x04000000;
@@ -65,31 +71,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = (y[6] >> 8) | (y[5] << 24);
     w[15] = (y[7] >> 8) | (y[6] << 24);
 
-    a = _IV[0];
-    b = _IV[1];
-    c = _IV[2];
-    d = _IV[3];
-    e = _IV[4];
-    f = _IV[5];
-    g = _IV[6];
-    h = _IV[7];
-
-    round(a, b, c, d, e, f, g, h, w[0], _K[0]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[1]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[2]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[3]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[4]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[5]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[6]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[7]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[8]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[9]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[10]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[11]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[12]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[13]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[14]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[15]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[3]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[4]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[5]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[6]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[7]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[8]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[9]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[10]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[11]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[12]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[13]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[14]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]);
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
     w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -108,22 +105,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[16]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[17]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[18]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[19]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[20]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[21]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[22]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[23]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[24]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[25]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[26]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[27]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[28]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[29]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[30]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[31]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]);
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
     w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -142,22 +139,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[32]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[33]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[34]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[35]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[36]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[37]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[38]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[39]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[40]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[41]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[42]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[43]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[44]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[45]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[46]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[47]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]);
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
     w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -176,22 +173,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[48]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[49]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[50]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[51]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[52]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[53]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[54]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[55]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[56]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[57]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[58]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[59]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[60]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[61]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[62]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[63]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]);
 
     a += _IV[0];
     b += _IV[1];
@@ -203,35 +200,34 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     h += _IV[7];
 
     // store the intermediate hash value
-    unsigned int tmp[8];
-    tmp[0] = a;
-    tmp[1] = b;
-    tmp[2] = c;
-    tmp[3] = d;
-    tmp[4] = e;
-    tmp[5] = f;
-    tmp[6] = g;
-    tmp[7] = h;
+    digest[0] = a;
+    digest[1] = b;
+    digest[2] = c;
+    digest[3] = d;
+    digest[4] = e;
+    digest[5] = f;
+    digest[6] = g;
+    digest[7] = h;
 
     w[0] = (y[7] << 24) | 0x00800000;
-    w[15] = 65 * 8;
-
-    round(a, b, c, d, e, f, g, h, w[0], _K[0]);
-    round(h, a, b, c, d, e, f, g, 0, _K[1]);
-    round(g, h, a, b, c, d, e, f, 0, _K[2]);
-    round(f, g, h, a, b, c, d, e, 0, _K[3]);
-    round(e, f, g, h, a, b, c, d, 0, _K[4]);
-    round(d, e, f, g, h, a, b, c, 0, _K[5]);
-    round(c, d, e, f, g, h, a, b, 0, _K[6]);
-    round(b, c, d, e, f, g, h, a, 0, _K[7]);
-    round(a, b, c, d, e, f, g, h, 0, _K[8]);
-    round(h, a, b, c, d, e, f, g, 0, _K[9]);
-    round(g, h, a, b, c, d, e, f, 0, _K[10]);
-    round(f, g, h, a, b, c, d, e, 0, _K[11]);
-    round(e, f, g, h, a, b, c, d, 0, _K[12]);
-    round(d, e, f, g, h, a, b, c, 0, _K[13]);
-    round(c, d, e, f, g, h, a, b, 0, _K[14]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[15]);
+    w[15] = 520; // 65 * 8
+
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]);
+    roundSha(h, a, b, c, d, e, f, g, 0, _K[1]);
+    roundSha(g, h, a, b, c, d, e, f, 0, _K[2]);
+    roundSha(f, g, h, a, b, c, d, e, 0, _K[3]);
+    roundSha(e, f, g, h, a, b, c, d, 0, _K[4]);
+    roundSha(d, e, f, g, h, a, b, c, 0, _K[5]);
+    roundSha(c, d, e, f, g, h, a, b, 0, _K[6]);
+    roundSha(b, c, d, e, f, g, h, a, 0, _K[7]);
+    roundSha(a, b, c, d, e, f, g, h, 0, _K[8]);
+    roundSha(h, a, b, c, d, e, f, g, 0, _K[9]);
+    roundSha(g, h, a, b, c, d, e, f, 0, _K[10]);
+    roundSha(f, g, h, a, b, c, d, e, 0, _K[11]);
+    roundSha(e, f, g, h, a, b, c, d, 0, _K[12]);
+    roundSha(d, e, f, g, h, a, b, c, 0, _K[13]);
+    roundSha(c, d, e, f, g, h, a, b, 0, _K[14]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]);
 
     w[0] = w[0] + s0(0) + 0 + s1(0);
     w[1] = 0 + s0(0) + 0 + s1(w[15]);
@@ -250,22 +246,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[16]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[17]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[18]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[19]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[20]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[21]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[22]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[23]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[24]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[25]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[26]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[27]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[28]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[29]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[30]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[31]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]);
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
     w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -284,22 +280,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[32]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[33]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[34]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[35]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[36]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[37]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[38]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[39]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[40]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[41]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[42]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[43]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[44]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[45]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[46]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[47]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]);
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
     w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -318,38 +314,38 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[48]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[49]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[50]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[51]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[52]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[53]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[54]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[55]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[56]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[57]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[58]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[59]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[60]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[61]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[62]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
-    digest[0] = tmp[0] + a;
-    digest[1] = tmp[1] + b;
-    digest[2] = tmp[2] + c;
-    digest[3] = tmp[3] + d;
-    digest[4] = tmp[4] + e;
-    digest[5] = tmp[5] + f;
-    digest[6] = tmp[6] + g;
-    digest[7] = tmp[7] + h;
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]);
+
+    digest[0] += a;
+    digest[1] += b;
+    digest[2] += c;
+    digest[3] += d;
+    digest[4] += e;
+    digest[5] += f;
+    digest[6] += g;
+    digest[7] += h;
 }
 
 void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8])
 {
-    unsigned int a, b, c, d, e, f, g, h;
-    unsigned int w[16];
-    unsigned int t;
+    __private unsigned int a, b, c, d, e, f, g, h;
+    __private unsigned int w[16];
+    __private unsigned int t;
 
     // 0x03 || x  or  0x02 || x
     w[0] = 0x02000000 | ((yParity & 1) << 24) | (x[0] >> 8);
@@ -362,7 +358,7 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
     w[6] = (x[6] >> 8) | (x[5] << 24);
     w[7] = (x[7] >> 8) | (x[6] << 24);
     w[8] = (x[7] << 24) | 0x00800000;
-    w[15] = 33 * 8;
+    w[15] = 264; // 33 * 8
 
     a = _IV[0];
     b = _IV[1];
@@ -373,22 +369,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
     g = _IV[6];
     h = _IV[7];
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[0]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[1]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[2]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[3]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[4]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[5]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[6]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[7]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[8]);
-    round(h, a, b, c, d, e, f, g, 0, _K[9]);
-    round(g, h, a, b, c, d, e, f, 0, _K[10]);
-    round(f, g, h, a, b, c, d, e, 0, _K[11]);
-    round(e, f, g, h, a, b, c, d, 0, _K[12]);
-    round(d, e, f, g, h, a, b, c, 0, _K[13]);
-    round(c, d, e, f, g, h, a, b, 0, _K[14]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[15]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[3]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[4]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[5]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[6]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[7]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[8]);
+    roundSha(h, a, b, c, d, e, f, g, 0, _K[9]);
+    roundSha(g, h, a, b, c, d, e, f, 0, _K[10]);
+    roundSha(f, g, h, a, b, c, d, e, 0, _K[11]);
+    roundSha(e, f, g, h, a, b, c, d, 0, _K[12]);
+    roundSha(d, e, f, g, h, a, b, c, 0, _K[13]);
+    roundSha(c, d, e, f, g, h, a, b, 0, _K[14]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]);
 
     w[0] = w[0] + s0(w[1]) + 0 + s1(0);
     w[1] = w[1] + s0(w[2]) + 0 + s1(w[15]);
@@ -407,22 +403,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
     w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[16]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[17]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[18]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[19]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[20]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[21]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[22]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[23]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[24]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[25]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[26]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[27]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[28]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[29]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[30]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[31]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]);
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
     w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -441,22 +437,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[32]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[33]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[34]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[35]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[36]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[37]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[38]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[39]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[40]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[41]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[42]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[43]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[44]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[45]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[46]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[47]);
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]);
 
 
     w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
@@ -476,39 +472,30 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
     w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
     w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
 
-    round(a, b, c, d, e, f, g, h, w[0], _K[48]);
-    round(h, a, b, c, d, e, f, g, w[1], _K[49]);
-    round(g, h, a, b, c, d, e, f, w[2], _K[50]);
-    round(f, g, h, a, b, c, d, e, w[3], _K[51]);
-    round(e, f, g, h, a, b, c, d, w[4], _K[52]);
-    round(d, e, f, g, h, a, b, c, w[5], _K[53]);
-    round(c, d, e, f, g, h, a, b, w[6], _K[54]);
-    round(b, c, d, e, f, g, h, a, w[7], _K[55]);
-    round(a, b, c, d, e, f, g, h, w[8], _K[56]);
-    round(h, a, b, c, d, e, f, g, w[9], _K[57]);
-    round(g, h, a, b, c, d, e, f, w[10], _K[58]);
-    round(f, g, h, a, b, c, d, e, w[11], _K[59]);
-    round(e, f, g, h, a, b, c, d, w[12], _K[60]);
-    round(d, e, f, g, h, a, b, c, w[13], _K[61]);
-    round(c, d, e, f, g, h, a, b, w[14], _K[62]);
-    round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
-    a += _IV[0];
-    b += _IV[1];
-    c += _IV[2];
-    d += _IV[3];
-    e += _IV[4];
-    f += _IV[5];
-    g += _IV[6];
-    h += _IV[7];
-
-    digest[0] = a;
-    digest[1] = b;
-    digest[2] = c;
-    digest[3] = d;
-    digest[4] = e;
-    digest[5] = f;
-    digest[6] = g;
-    digest[7] = h;
+    roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]);
+    roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]);
+    roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]);
+    roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]);
+    roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]);
+    roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]);
+    roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]);
+    roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]);
+    roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]);
+    roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]);
+    roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]);
+    roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]);
+    roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]);
+    roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]);
+    roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]);
+    roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]);
+
+    digest[0] = a + _IV[0];
+    digest[1] = b + _IV[1];
+    digest[2] = c + _IV[2];
+    digest[3] = d + _IV[3];
+    digest[4] = e + _IV[4];
+    digest[5] = f + _IV[5];
+    digest[6] = g + _IV[6];
+    digest[7] = h + _IV[7];
 }
 #endif
diff --git a/clUtil/clContext.cpp b/clUtil/clContext.cpp
index 0a27518..a50eedb 100644
--- a/clUtil/clContext.cpp
+++ b/clUtil/clContext.cpp
@@ -15,7 +15,7 @@ cl::CLContext::CLContext(cl_device_id device)
     _ctx = clCreateContext(0, 1, &_device, NULL, NULL, &err);
     clCall(err);
 
-    _queue = clCreateCommandQueue(_ctx, _device, 0, &err);
+    _queue = clCreateCommandQueueWithProperties(_ctx, _device, NULL, &err);
     clCall(err);
 }
 
@@ -94,7 +94,7 @@ cl::CLProgram::CLProgram(cl::CLContext &ctx, std::string srcFile, std::string op
     cl_int err;
 
     if(util::toLower(_ctx.getDeviceVendor()).find("intel") != std::string::npos) {
-        options += "-DDEVICE_VENDOR_INTEL";
+        options += " -DDEVICE_VENDOR_INTEL";
     }
 
     _prog = clCreateProgramWithSource(ctx.getContext(), 1, &ptr, &len, &err);
@@ -246,4 +246,4 @@ size_t cl::CLKernel::getWorkGroupSize()
 cl::CLKernel::~CLKernel()
 {
     clReleaseKernel(_kernel);
-}
\ No newline at end of file
+}
diff --git a/clUtil/clContext.h b/clUtil/clContext.h
index dd8f4ec..4b28027 100644
--- a/clUtil/clContext.h
+++ b/clUtil/clContext.h
@@ -1,5 +1,5 @@
-#ifndef _CL_CONTEXT_H
-#define _CL_CONTEXT_H
+#ifndef CL_CONTEXT_H
+#define CL_CONTEXT_H
 
 #include <string>
 #include "clutil.h"
@@ -57,9 +57,6 @@ class CLProgram {
     cl_program getProgram();
 
     CLContext& getContext();
-
-    std::string getBuildLog();
-
 };
 
 
@@ -115,7 +112,7 @@ class CLKernel {
         clCall(clSetKernelArg(_kernel, 2, sizeof(arg3), &arg3));
         clCall(clSetKernelArg(_kernel, 3, sizeof(arg4), &arg4));
         clCall(clSetKernelArg(_kernel, 4, sizeof(arg5), &arg5));
-        clCall(clSetKernelArg(_kernel, 4, sizeof(arg6), &arg6));
+        clCall(clSetKernelArg(_kernel, 5, sizeof(arg6), &arg6));
     }
 
     template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
@@ -245,30 +242,6 @@ class CLKernel {
         clCall(clSetKernelArg(_kernel, 15, sizeof(T16), &arg16));
     }
 
-    template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8,
-        typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15, typename T16,
-        typename T17>
-        void set_args(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10, T11 arg11, T12 arg12,
-            T13 arg13, T14 arg14, T15 arg15, T16 arg16)
-    {
-        clCall(clSetKernelArg(_kernel, 0, sizeof(T1), &arg1));
-        clCall(clSetKernelArg(_kernel, 1, sizeof(T2), &arg2));
-        clCall(clSetKernelArg(_kernel, 2, sizeof(T3), &arg3));
-        clCall(clSetKernelArg(_kernel, 3, sizeof(T4), &arg4));
-        clCall(clSetKernelArg(_kernel, 4, sizeof(T5), &arg5));
-        clCall(clSetKernelArg(_kernel, 5, sizeof(T6), &arg6));
-        clCall(clSetKernelArg(_kernel, 6, sizeof(T7), &arg7));
-        clCall(clSetKernelArg(_kernel, 7, sizeof(T8), &arg8));
-        clCall(clSetKernelArg(_kernel, 8, sizeof(T9), &arg9));
-        clCall(clSetKernelArg(_kernel, 9, sizeof(T10), &arg10));
-        clCall(clSetKernelArg(_kernel, 10, sizeof(T11), &arg11));
-        clCall(clSetKernelArg(_kernel, 11, sizeof(T12), &arg12));
-        clCall(clSetKernelArg(_kernel, 12, sizeof(T13), &arg13));
-        clCall(clSetKernelArg(_kernel, 13, sizeof(T14), &arg14));
-        clCall(clSetKernelArg(_kernel, 14, sizeof(T15), &arg15));
-        clCall(clSetKernelArg(_kernel, 15, sizeof(T16), &arg16));
-    }
-
     template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8,
         typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15, typename T16,
         typename T17>
@@ -330,4 +303,4 @@ class CLKernel {
 
 
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/clUtil/clError.cpp b/clUtil/clError.cpp
new file mode 100644
index 0000000..c5e7f57
--- /dev/null
+++ b/clUtil/clError.cpp
@@ -0,0 +1,142 @@
+#include "clutil.h"
+
+std::string cl::getOpenCLErrorName(cl_int errorCode)
+{
+	switch (errorCode)
+	{
+	case CL_SUCCESS:                                            return "CL_SUCCESS";
+	case CL_DEVICE_NOT_FOUND:                                   return "CL_DEVICE_NOT_FOUND";
+	case CL_DEVICE_NOT_AVAILABLE:                               return "CL_DEVICE_NOT_AVAILABLE";
+	case CL_COMPILER_NOT_AVAILABLE:                             return "CL_COMPILER_NOT_AVAILABLE";
+	case CL_MEM_OBJECT_ALLOCATION_FAILURE:                      return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
+	case CL_OUT_OF_RESOURCES:                                   return "CL_OUT_OF_RESOURCES";
+	case CL_OUT_OF_HOST_MEMORY:                                 return "CL_OUT_OF_HOST_MEMORY";
+	case CL_PROFILING_INFO_NOT_AVAILABLE:                       return "CL_PROFILING_INFO_NOT_AVAILABLE";
+	case CL_MEM_COPY_OVERLAP:                                   return "CL_MEM_COPY_OVERLAP";
+	case CL_IMAGE_FORMAT_MISMATCH:                              return "CL_IMAGE_FORMAT_MISMATCH";
+	case CL_IMAGE_FORMAT_NOT_SUPPORTED:                         return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
+	case CL_BUILD_PROGRAM_FAILURE:                              return "CL_BUILD_PROGRAM_FAILURE";
+	case CL_MAP_FAILURE:                                        return "CL_MAP_FAILURE";
+	case CL_MISALIGNED_SUB_BUFFER_OFFSET:                       return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
+	case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:          return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
+	case CL_COMPILE_PROGRAM_FAILURE:                            return "CL_COMPILE_PROGRAM_FAILURE";
+	case CL_LINKER_NOT_AVAILABLE:                               return "CL_LINKER_NOT_AVAILABLE";
+	case CL_LINK_PROGRAM_FAILURE:                               return "CL_LINK_PROGRAM_FAILURE";
+	case CL_DEVICE_PARTITION_FAILED:                            return "CL_DEVICE_PARTITION_FAILED";
+	case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:                      return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
+	case CL_INVALID_VALUE:                                      return "CL_INVALID_VALUE";
+	case CL_INVALID_DEVICE_TYPE:                                return "CL_INVALID_DEVICE_TYPE";
+	case CL_INVALID_PLATFORM:                                   return "CL_INVALID_PLATFORM";
+	case CL_INVALID_DEVICE:                                     return "CL_INVALID_DEVICE";
+	case CL_INVALID_CONTEXT:                                    return "CL_INVALID_CONTEXT";
+	case CL_INVALID_QUEUE_PROPERTIES:                           return "CL_INVALID_QUEUE_PROPERTIES";
+	case CL_INVALID_COMMAND_QUEUE:                              return "CL_INVALID_COMMAND_QUEUE";
+	case CL_INVALID_HOST_PTR:                                   return "CL_INVALID_HOST_PTR";
+	case CL_INVALID_MEM_OBJECT:                                 return "CL_INVALID_MEM_OBJECT";
+	case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:                    return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
+	case CL_INVALID_IMAGE_SIZE:                                 return "CL_INVALID_IMAGE_SIZE";
+	case CL_INVALID_SAMPLER:                                    return "CL_INVALID_SAMPLER";
+	case CL_INVALID_BINARY:                                     return "CL_INVALID_BINARY";
+	case CL_INVALID_BUILD_OPTIONS:                              return "CL_INVALID_BUILD_OPTIONS";
+	case CL_INVALID_PROGRAM:                                    return "CL_INVALID_PROGRAM";
+	case CL_INVALID_PROGRAM_EXECUTABLE:                         return "CL_INVALID_PROGRAM_EXECUTABLE";
+	case CL_INVALID_KERNEL_NAME:                                return "CL_INVALID_KERNEL_NAME";
+	case CL_INVALID_KERNEL_DEFINITION:                          return "CL_INVALID_KERNEL_DEFINITION";
+	case CL_INVALID_KERNEL:                                     return "CL_INVALID_KERNEL";
+	case CL_INVALID_ARG_INDEX:                                  return "CL_INVALID_ARG_INDEX";
+	case CL_INVALID_ARG_VALUE:                                  return "CL_INVALID_ARG_VALUE";
+	case CL_INVALID_ARG_SIZE:                                   return "CL_INVALID_ARG_SIZE";
+	case CL_INVALID_KERNEL_ARGS:                                return "CL_INVALID_KERNEL_ARGS";
+	case CL_INVALID_WORK_DIMENSION:                             return "CL_INVALID_WORK_DIMENSION";
+	case CL_INVALID_WORK_GROUP_SIZE:                            return "CL_INVALID_WORK_GROUP_SIZE";
+	case CL_INVALID_WORK_ITEM_SIZE:                             return "CL_INVALID_WORK_ITEM_SIZE";
+	case CL_INVALID_GLOBAL_OFFSET:                              return "CL_INVALID_GLOBAL_OFFSET";
+	case CL_INVALID_EVENT_WAIT_LIST:                            return "CL_INVALID_EVENT_WAIT_LIST";
+	case CL_INVALID_EVENT:                                      return "CL_INVALID_EVENT";
+	case CL_INVALID_OPERATION:                                  return "CL_INVALID_OPERATION";
+	case CL_INVALID_GL_OBJECT:                                  return "CL_INVALID_GL_OBJECT";
+	case CL_INVALID_BUFFER_SIZE:                                return "CL_INVALID_BUFFER_SIZE";
+	case CL_INVALID_MIP_LEVEL:                                  return "CL_INVALID_MIP_LEVEL";
+	case CL_INVALID_GLOBAL_WORK_SIZE:                           return "CL_INVALID_GLOBAL_WORK_SIZE";
+	case CL_INVALID_PROPERTY:                                   return "CL_INVALID_PROPERTY";
+	case CL_INVALID_IMAGE_DESCRIPTOR:                           return "CL_INVALID_IMAGE_DESCRIPTOR";
+	case CL_INVALID_COMPILER_OPTIONS:                           return "CL_INVALID_COMPILER_OPTIONS";
+	case CL_INVALID_LINKER_OPTIONS:                             return "CL_INVALID_LINKER_OPTIONS";
+	case CL_INVALID_DEVICE_PARTITION_COUNT:                     return "CL_INVALID_DEVICE_PARTITION_COUNT";
+	case CL_INVALID_PIPE_SIZE:                                  return "CL_INVALID_PIPE_SIZE";
+	case CL_INVALID_DEVICE_QUEUE:                               return "CL_INVALID_DEVICE_QUEUE";
+	case CL_INVALID_SPEC_ID:                                    return "CL_INVALID_SPEC_ID";
+	case CL_MAX_SIZE_RESTRICTION_EXCEEDED:                      return "CL_MAX_SIZE_RESTRICTION_EXCEEDED";
+
+	default:
+		                                                        return "CL_UNKNOWN_ERROR_CODE";
+	}
+}
+
+// from http://www.techdarting.com/2014/01/opencl-errors.html
+std::string cl::getOpenCLErrorDescription(cl_int err) {
+	switch (err) {
+		case CL_SUCCESS:                                        return "Everything is good!";
+		case CL_DEVICE_NOT_FOUND:                               return "No OpenCL devices that matched given device type were found";
+		case CL_DEVICE_NOT_AVAILABLE:                           return "No OpenCL compatible device was found";
+		case CL_COMPILER_NOT_AVAILABLE:                         return "OpenCL Compiler perhaps failed to configure itself, or check your OpenCL installation";
+		case CL_MEM_OBJECT_ALLOCATION_FAILURE:                  return "Failed to allocate memory for buffer object";
+		case CL_OUT_OF_RESOURCES:                               return "failure to allocate resources required by the OpenCL implementation on the device";
+		case CL_OUT_OF_HOST_MEMORY:                             return "failure to allocate resources required by the OpenCL implementation on the host";
+		case CL_PROFILING_INFO_NOT_AVAILABLE:                   return "returned by clGetEventProfilingInfo, if the CL_QUEUE_PROFILING_ENABLE flag is not set for the command-queue and if the profiling information is currently not available";
+		case CL_MEM_COPY_OVERLAP:                               return "if source and destination buffers are the same buffer object and the source and destination regions overlap";
+		case CL_IMAGE_FORMAT_MISMATCH:                          return "src and dst image do not use the same image format";
+		case CL_IMAGE_FORMAT_NOT_SUPPORTED:                     return "the image format is not supported.";
+		case CL_BUILD_PROGRAM_FAILURE:                          return "program build error for given device, Use clGetProgramBuildInfo API call to get the build log of the kernel compilation.";
+		case CL_MAP_FAILURE:                                    return "failed to map the requested region into the host address space. This error does not occur for buffer objects created with CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR";
+		case CL_MISALIGNED_SUB_BUFFER_OFFSET:                   return "no devices in given context associated with buffer for which the origin value is aligned to the CL_DEVICE_MEM_BASE_ADDR_ALIGN value";
+		case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:      return "returned by clWaitForEvents(), execution status of any of the events in event list is a negative integer value i.e., error";
+		case CL_COMPILE_PROGRAM_FAILURE:                        return "failed to compile the program source. Error occurs if clCompileProgram does not return until the compile has completed";
+		case CL_LINKER_NOT_AVAILABLE:                           return "Linker unavailable";
+		case CL_LINK_PROGRAM_FAILURE:                           return "failed to link the compiled binaries and perhaps libraries";
+		case CL_DEVICE_PARTITION_FAILED:                        return "given partition name is supported by the implementation but input device couldn't be partitioned further";
+		case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:                  return "argument information is not available for the given kernel";
+		case CL_INVALID_VALUE:                                  return "values passed in the flags parameter is not valid";
+		case CL_INVALID_DEVICE_TYPE:                            return "device type specified is not valid, its returned by clCreateContextFromType / clGetDeviceIDs";
+		case CL_INVALID_PLATFORM:                               return "the specified platform is not a valid platform, its returned by clGetPlatformInfo /clGetDeviceIDs / clCreateContext / clCreateContextFromType";
+		case CL_INVALID_DEVICE:                                 return "device/s specified are not valid";
+		case CL_INVALID_CONTEXT:                                return "the given context is invalid OpenCL context, or the context associated with certain parameters are not the same";
+		case CL_INVALID_QUEUE_PROPERTIES:                       return "specified properties are valid but are not supported by the device, its returned by clCreateCommandQueue / clSetCommandQueueProperty";
+		case CL_INVALID_COMMAND_QUEUE:                          return "the specified command-queue is not a valid command-queue";
+		case CL_INVALID_HOST_PTR:                               return "host pointer is NULL and CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are set in flags or if host_ptr is not NULL but CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are not set in flags. returned by clCreateBuffer / clCreateImage2D / clCreateImage3D";
+		case CL_INVALID_MEM_OBJECT:                             return "the passed parameter is not a valid memory, image, or buffer object";
+		case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:                return "image format specified is not valid or is NULL, clCreateImage2D /clCreateImage3D returns this.";
+		case CL_INVALID_IMAGE_SIZE:                             return "Its returned by create Image functions 2D/3D, if specified image width or height are outbound or 0";
+		case CL_INVALID_SAMPLER:                                return "specified sampler is an invalid sampler object";
+		case CL_INVALID_BINARY:                                 return "program binary is not a valid binary for the specified device, returned by clBuildProgram / clCreateProgramWithBinary";
+		case CL_INVALID_BUILD_OPTIONS:                          return "the given build options are not valid";
+		case CL_INVALID_PROGRAM:                                return "the given program is an invalid program object, returned by clRetainProgram / clReleaseProgram / clBuildProgram / clGetProgramInfo / clGetProgramBuildInfo / clCreateKernel / clCreateKernelsInProgram";
+		case CL_INVALID_PROGRAM_EXECUTABLE:                     return "if there is no successfully built executable for program returned by clCreateKernel, there is no device in program then returned by clCreateKernelsInProgram, if no successfully built program executable present for device associated with command queue then returned by clEnqueueNDRangeKernel / clEnqueueTask";
+		case CL_INVALID_KERNEL_NAME:                            return "mentioned kernel name is not found in program";
+		case CL_INVALID_KERNEL_DEFINITION:                      return "arguments mismatch for the __kernel function definition and the passed ones, returned by clCreateKernel";
+		case CL_INVALID_KERNEL:                                 return "specified kernel is an invalid kernel object";
+		case CL_INVALID_ARG_INDEX:                              return "clSetKernelArg if an invalid argument index is specified";
+		case CL_INVALID_ARG_VALUE:                              return "the argument value specified is NULL, returned by clSetKernelArg";
+		case CL_INVALID_ARG_SIZE:                               return "the given argument size (arg_size) do not match size of the data type for an argument, returned by clSetKernelArg";
+		case CL_INVALID_KERNEL_ARGS:                            return "the kernel argument values have not been specified, returned by clEnqueueNDRangeKernel / clEnqueueTask";
+		case CL_INVALID_WORK_DIMENSION:                         return "given work dimension is an invalid value, returned by clEnqueueNDRangeKernel"; 
+		case CL_INVALID_WORK_GROUP_SIZE:                        return "the specified local workgroup size and number of workitems specified by global workgroup size is not evenly divisible by local workgroup size";
+		case CL_INVALID_WORK_ITEM_SIZE:                         return "no. of workitems specified in any of local work group sizes is greater than the corresponding values specified by CL_DEVICE_MAX_WORK_ITEM_SIZES in that particular dimension";
+		case CL_INVALID_GLOBAL_OFFSET:                          return "global_work_offset is not NULL. Must currently be a NULL value. In a future revision of OpenCL, global_work_offset can be used but not until OCL 1.2";
+		case CL_INVALID_EVENT_WAIT_LIST:                        return "event wait list is NULL and (no. of events in wait list > 0), or event wait list is not NULL and no. of events in wait list is 0, or specified event objects are not valid events";
+		case CL_INVALID_EVENT:                                  return "invalid event objects specified";
+		case CL_INVALID_GL_OBJECT:                              return "not a valid GL buffer object";
+		case CL_INVALID_BUFFER_SIZE:                            return "the value of the parameter size is 0 or exceeds CL_DEVICE_MAX_MEM_ALLOC_SIZE for all devices specified in the parameter context, returned by clCreateBuffer";
+		case CL_INVALID_GLOBAL_WORK_SIZE:                       return "specified global work size is NULL, or any of the values specified in global work dimensions are 0 or exceeds the range given by the sizeof(size_t) for the device on which the kernel will be enqueued, returned by clEnqueueNDRangeKernel";
+		case CL_INVALID_PROPERTY:                               return "context property name in properties is not a supported property name, returned by clCreateContext";
+		case CL_INVALID_IMAGE_DESCRIPTOR:                       return "values specified in image description are invalid";
+		case CL_INVALID_COMPILER_OPTIONS:                       return "compiler options specified by options are invalid, returned by clCompileProgram";
+		case CL_INVALID_LINKER_OPTIONS:                         return "linker options specified by options are invalid, returned by clLinkProgram";
+		case CL_INVALID_DEVICE_PARTITION_COUNT:                 return "partition name specified in properties is CL_DEVICE_PARTITION_BY_COUNTS and the number of sub-devices requested exceeds CL_DEVICE_PARTITION_MAX_SUB_DEVICES or the total number of compute units requested exceeds CL_DEVICE_PARTITION_MAX_COMPUTE_UNITS for in_device, or the number of compute units requested for one or more sub-devices is less than zero or the number of sub-devices requested exceeds CL_DEVICE_PARTITION_MAX_COMPUTE_UNITS for in_device";
+		case CL_INVALID_PIPE_SIZE:                              return "pipe_packet_size is 0 or the pipe_packet_size exceeds CL_DEVICE_PIPE_MAX_PACKET_SIZE value specified in table 4.3 (see clGetDeviceInfo) for all devices in context or pipe_max_packets is 0";
+		case CL_INVALID_SPEC_ID:                                return "spec_id is not a valid specialization constant identifier"; 
+		case CL_MAX_SIZE_RESTRICTION_EXCEEDED:                  return "the size in bytes of the memory object (if the argument is a memory object) or arg_size (if the argument is declared with local qualifier) exceeds a language- specified maximum size restriction for this argument, such as the MaxByteOffset SPIR-V decoration";
+		
+		default: return "No description available";
+	}
+}
diff --git a/clUtil/clUtil.cpp b/clUtil/clUtil.cpp
index 02b3835..0f16152 100644
--- a/clUtil/clUtil.cpp
+++ b/clUtil/clUtil.cpp
@@ -1,6 +1,5 @@
 #include "clutil.h"
 
-
 void cl::clCall(cl_int err)
 {
     if(err != CL_SUCCESS) {
@@ -8,7 +7,6 @@ void cl::clCall(cl_int err)
     }
 }
 
-
 std::vector<cl::CLDeviceInfo> cl::getDevices()
 {
     std::vector<cl::CLDeviceInfo> deviceList;
@@ -52,6 +50,10 @@ std::vector<cl::CLDeviceInfo> cl::getDevices()
 
             info.cores = cores;
 
+            size_t maxWorkingGroupSize = 0;
+            clCall(clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &maxWorkingGroupSize, NULL));
+            info.maxWorkingGroupSize = maxWorkingGroupSize;
+
             cl_ulong mem;
             clCall(clGetDeviceInfo(devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem), &mem, NULL));
 
@@ -60,10 +62,10 @@ std::vector<cl::CLDeviceInfo> cl::getDevices()
             deviceList.push_back(info);
         }
 
-        delete devices;
+        delete[] devices;
     }
 
-    delete platforms;
+    delete[] platforms;
 
     return deviceList;
-}
\ No newline at end of file
+}
diff --git a/clUtil/clUtil.vcxproj b/clUtil/clUtil.vcxproj
index 4a66f61..055f977 100644
--- a/clUtil/clUtil.vcxproj
+++ b/clUtil/clUtil.vcxproj
@@ -5,6 +5,14 @@
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|Win32">
+      <Configuration>Performance Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|x64">
+      <Configuration>Performance Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -28,26 +36,40 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PreferredToolArchitecture>x64</PreferredToolArchitecture>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
@@ -60,6 +82,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -68,6 +94,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -98,15 +128,40 @@
       <AdditionalIncludeDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
   </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>EnableAllWarnings</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
       <AdditionalIncludeDirectories>$(OPENCL_INCLUDE);$(SolutionDir)\util;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>$(OPENCL_INCLUDE);$(SolutionDir)\util;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -124,7 +179,7 @@
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="clContext.cpp" />
-    <ClCompile Include="clerrors.cpp" />
+    <ClCompile Include="clError.cpp" />
     <ClCompile Include="clUtil.cpp" />
   </ItemGroup>
   <ItemGroup>
diff --git a/clUtil/clerrors.cpp b/clUtil/clerrors.cpp
deleted file mode 100644
index 56b1542..0000000
--- a/clUtil/clerrors.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-#include "clutil.h"
-
-
-std::string cl::getErrorString(cl_int err)
-{
-    switch(err) {
-    case 0: return "CL_SUCCESS";
-    case -1: return "CL_DEVICE_NOT_FOUND";
-    case -2: return "CL_DEVICE_NOT_AVAILABLE";
-    case -3: return "CL_COMPILER_NOT_AVAILABLE";
-    case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
-    case -5: return "CL_OUT_OF_RESOURCES";
-    case -6: return "CL_OUT_OF_HOST_MEMORY";
-    case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
-    case -8: return "CL_MEM_COPY_OVERLAP";
-    case -9: return "CL_IMAGE_FORMAT_MISMATCH";
-    case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
-    case -11: return "CL_BUILD_PROGRAM_FAILURE";
-    case -12: return "CL_MAP_FAILURE";
-    case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
-    case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
-    case -15: return "CL_COMPILE_PROGRAM_FAILURE";
-    case -16: return "CL_LINKER_NOT_AVAILABLE";
-    case -17: return "CL_LINK_PROGRAM_FAILURE";
-    case -18: return "CL_DEVICE_PARTITION_FAILED";
-    case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
-    case -30: return "CL_INVALID_VALUE";
-    case -31: return "CL_INVALID_DEVICE_TYPE";
-    case -32: return "CL_INVALID_PLATFORM";
-    case -33: return "CL_INVALID_DEVICE";
-    case -34: return "CL_INVALID_CONTEXT";
-    case -35: return "CL_INVALID_QUEUE_PROPERTIES";
-    case -36: return "CL_INVALID_COMMAND_QUEUE";
-    case -37: return "CL_INVALID_HOST_PTR";
-    case -38: return "CL_INVALID_MEM_OBJECT";
-    case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
-    case -40: return "CL_INVALID_IMAGE_SIZE";
-    case -41: return "CL_INVALID_SAMPLER";
-    case -42: return "CL_INVALID_BINARY";
-    case -43: return "CL_INVALID_BUILD_OPTIONS";
-    case -44: return "CL_INVALID_PROGRAM";
-    case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
-    case -46: return "CL_INVALID_KERNEL_NAME";
-    case -47: return "CL_INVALID_KERNEL_DEFINITION";
-    case -48: return "CL_INVALID_KERNEL";
-    case -49: return "CL_INVALID_ARG_INDEX";
-    case -50: return "CL_INVALID_ARG_VALUE";
-    case -51: return "CL_INVALID_ARG_SIZE";
-    case -52: return "CL_INVALID_KERNEL_ARGS";
-    case -53: return "CL_INVALID_WORK_DIMENSION";
-    case -54: return "CL_INVALID_WORK_GROUP_SIZE";
-    case -55: return "CL_INVALID_WORK_ITEM_SIZE";
-    case -56: return "CL_INVALID_GLOBAL_OFFSET";
-    case -57: return "CL_INVALID_EVENT_WAIT_LIST";
-    case -58: return "CL_INVALID_EVENT";
-    case -59: return "CL_INVALID_OPERATION";
-    case -60: return "CL_INVALID_GL_OBJECT";
-    case -61: return "CL_INVALID_BUFFER_SIZE";
-    case -62: return "CL_INVALID_MIP_LEVEL";
-    case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
-    case -64: return "CL_INVALID_PROPERTY";
-    case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
-    case -66: return "CL_INVALID_COMPILER_OPTIONS";
-    case -67: return "CL_INVALID_LINKER_OPTIONS";
-    case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
-    default: return "CL_UNKNOWN_ERROR";
-    }
-
-}
\ No newline at end of file
diff --git a/clUtil/clutil.h b/clUtil/clutil.h
index 3559145..617a2b5 100644
--- a/clUtil/clutil.h
+++ b/clUtil/clutil.h
@@ -1,5 +1,5 @@
-#ifndef _CL_UTIL_H
-#define _CL_UTIL_H
+#ifndef CL_UTIL_H
+#define CL_UTIL_H
 
 #ifdef __APPLE__
 #define CL_SILENCE_DEPRECATION
@@ -12,31 +12,42 @@
 #include <vector>
 
 namespace cl {
-    std::string getErrorString(cl_int err);
+    std::string getOpenCLErrorName(cl_int errorCode);
+    std::string getOpenCLErrorDescription(cl_int errorCode);
 
     typedef struct {
         cl_device_id id;
         int cores;
         uint64_t mem;
         std::string name;
-
+        size_t maxWorkingGroupSize;
     }CLDeviceInfo;
 
     class CLException {
     public:
         int error;
         std::string msg;
+        std::string description;
+
+        CLException(cl_int errorCode)
+        {
+            this->error = errorCode;
+            this->msg = getOpenCLErrorName(errorCode);
+            this->description = getOpenCLErrorDescription(errorCode);
+        }
 
-        CLException(cl_int err)
+        CLException(cl_int errorCode, std::string pMsg)
         {
-            this->error = err;
-            this->msg = getErrorString(err);
+            this->error = errorCode;
+            this->msg = pMsg;
+            this->description = getOpenCLErrorDescription(errorCode);
         }
 
-        CLException(cl_int err, std::string msg)
+        CLException(cl_int errorCode, std::string pMsg, std::string pDescription)
         {
-            this->error = err;
-            this->msg = msg;
+            this->error = errorCode;
+            this->msg = pMsg;
+            this->description = pDescription;
         }
     };
 
@@ -44,10 +55,8 @@ namespace cl {
 
     std::vector<CLDeviceInfo> getDevices();
 
-    int getDeviceCount();
-
     void clCall(cl_int err);
 
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/cudaInfo/Makefile b/cudaInfo/Makefile
deleted file mode 100644
index 8c81055..0000000
--- a/cudaInfo/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-CPPSRC:=$(wildcard *.cpp)
-
-all:
-	${CXX} -o cudainfo.bin ${CPPSRC} ${INCLUDE} -I${CUDA_INCLUDE} ${CXXFLAGS} ${LIBS} -L${CUDA_LIB} -lcudautil -lcudart 
-	mkdir -p $(BINDIR)
-	cp cudainfo.bin $(BINDIR)/cudainfo
-
-clean:
-	rm -rf cudainfo.bin
\ No newline at end of file
diff --git a/cudaInfo/cudaInfo.vcxproj b/cudaInfo/cudaInfo.vcxproj
deleted file mode 100644
index 6636506..0000000
--- a/cudaInfo/cudaInfo.vcxproj
+++ /dev/null
@@ -1,94 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="main.cpp" />
-  </ItemGroup>
-  <ItemGroup>
-    <ProjectReference Include="..\cudaUtil\cudaUtil.vcxproj">
-      <Project>{eadaaa54-e304-4656-8263-e5e688ff323d}</Project>
-    </ProjectReference>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}</ProjectGuid>
-    <RootNamespace>cudaInfo</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.props" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(SolutionDir)cudaUtil;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <CudaCompile>
-      <TargetMachinePlatform>64</TargetMachinePlatform>
-    </CudaCompile>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(SolutionDir)cudaUtil;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <CudaCompile>
-      <TargetMachinePlatform>64</TargetMachinePlatform>
-    </CudaCompile>
-  </ItemDefinitionGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.targets" />
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/cudaInfo/main.cpp b/cudaInfo/main.cpp
deleted file mode 100644
index 77d1fec..0000000
--- a/cudaInfo/main.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <stdio.h>
-#include <vector>
-
-#include"cudaUtil.h"
-
-void printDeviceInfo(const cuda::CudaDeviceInfo &info)
-{
-	printf("ID:          %d\n", info.id);
-	printf("Name:        %s\n", info.name.c_str());
-	printf("Capability:  %d.%d\n", info.major, info.minor);
-	printf("MP:          %d\n", info.mpCount);
-	printf("Cores:       %d (%d per MP)\n", info.mpCount * info.cores, info.cores);
-	printf("Memory:      %dMB\n", (int)(info.mem / (1024 * 1024)));
-}
-
-int main(int argc, char **argv)
-{
-	try {
-		std::vector<cuda::CudaDeviceInfo> devices = cuda::getDevices();
-
-		printf("Found %d devices\n\n", (int)devices.size());
-
-		for(int i = 0; i < (int)devices.size(); i++) {
-			printDeviceInfo(devices[i]);
-			printf("\n");
-		}
-	} catch(cuda::CudaException &ex) {
-		printf("Error querying devices: %s\n", ex.msg.c_str());
-
-		return 1;
-	}
-
-	return 0;
-}
\ No newline at end of file
diff --git a/cudaMath/cudaMath.vcxproj b/cudaMath/cudaMath.vcxproj
deleted file mode 100644
index fcf4097..0000000
--- a/cudaMath/cudaMath.vcxproj
+++ /dev/null
@@ -1,90 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{E1BDB205-8994-4E49-8B35-172A84E7118C}</ProjectGuid>
-    <RootNamespace>cudaMath</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.props" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-    </ClCompile>
-    <Link>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <CudaCompile>
-      <TargetMachinePlatform>64</TargetMachinePlatform>
-    </CudaCompile>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-    </ClCompile>
-    <Link>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <CudaCompile>
-      <TargetMachinePlatform>64</TargetMachinePlatform>
-    </CudaCompile>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClInclude Include="ptx.cuh" />
-    <ClInclude Include="ripemd160.cuh" />
-    <ClInclude Include="secp256k1.cuh" />
-    <ClInclude Include="sha256.cuh" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.targets" />
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/cudaMath/ptx.cuh b/cudaMath/ptx.cuh
deleted file mode 100644
index 0bbcffc..0000000
--- a/cudaMath/ptx.cuh
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _PTX_H
-#define _PTX_H
-
-#include<cuda_runtime.h>
-
-#define madc_hi(dest, a, x, b) asm volatile("madc.hi.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b))
-#define madc_hi_cc(dest, a, x, b) asm volatile("madc.hi.cc.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b))
-#define mad_hi_cc(dest, a, x, b) asm volatile("mad.hi.cc.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b))
-
-#define mad_lo_cc(dest, a, x, b) asm volatile("mad.lo.cc.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b))
-#define madc_lo(dest, a, x, b) asm volatile("madc.lo.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b))
-#define madc_lo_cc(dest, a, x, b) asm volatile("madc.lo.cc.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x),"r"(b))
-
-#define addc(dest, a, b) asm volatile("addc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-#define add_cc(dest, a, b) asm volatile("add.cc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-#define addc_cc(dest, a, b) asm volatile("addc.cc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-
-#define sub_cc(dest, a, b) asm volatile("sub.cc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-#define subc_cc(dest, a, b) asm volatile("subc.cc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-#define subc(dest, a, b) asm volatile("subc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-
-#define set_eq(dest,a,b) asm volatile("set.eq.u32.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-
-#define lsbpos(x) (__ffs((x)))
-
-
-__device__ __forceinline__ unsigned int endian(unsigned int x)
-{
-	return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24);
-}
-
-#endif
\ No newline at end of file
diff --git a/cudaMath/ripemd160.cuh b/cudaMath/ripemd160.cuh
deleted file mode 100644
index 47590bb..0000000
--- a/cudaMath/ripemd160.cuh
+++ /dev/null
@@ -1,539 +0,0 @@
-#ifndef _RIPEMD160_CUH
-#define _RIPEMD160_CUH
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <device_launch_parameters.h>
-#include "ptx.cuh"
-
-__constant__ unsigned int _RIPEMD160_IV[5] = {
-	0x67452301,
-	0xefcdab89,
-	0x98badcfe,
-	0x10325476,
-	0xc3d2e1f0
-};
-
-__constant__ unsigned int _K0 = 0x5a827999;
-__constant__ unsigned int _K1 = 0x6ed9eba1;
-__constant__ unsigned int _K2 = 0x8f1bbcdc;
-__constant__ unsigned int _K3 = 0xa953fd4e;
-
-__constant__ unsigned int _K4 = 0x7a6d76e9;
-__constant__ unsigned int _K5 = 0x6d703ef3;
-__constant__ unsigned int _K6 = 0x5c4dd124;
-__constant__ unsigned int _K7 = 0x50a28be6;
-
-
-__device__ __forceinline__ unsigned int rotl(unsigned int x, int n)
-{
-	return (x << n) | (x >> (32 - n));
-}
-
-__device__ __forceinline__ unsigned int F(unsigned int x, unsigned int y, unsigned int z)
-{
-	return x ^ y ^ z;
-}
-
-__device__ __forceinline__ unsigned int G(unsigned int x, unsigned int y, unsigned int z)
-{
-	return (((x) & (y)) | (~(x) & (z)));
-}
-
-__device__ __forceinline__ unsigned int H(unsigned int x, unsigned int y, unsigned int z)
-{
-	return (((x) | ~(y)) ^ (z));
-}
-
-__device__ __forceinline__ unsigned int I(unsigned int x, unsigned int y, unsigned int z)
-{
-	return (((x) & (z)) | ((y) & ~(z)));
-}
-
-__device__ __forceinline__ unsigned int J(unsigned int x, unsigned int y, unsigned int z)
-{
-	return  ((x) ^ ((y) | ~(z)));
-}
-
-__device__ __forceinline__ void FF(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
-	a += F(b, c, d) + x;
-	a = rotl(a, s) + e;
-	c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void GG(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
-	a += G(b, c, d) + x + _K0;
-	a = rotl(a, s) + e;
-	c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void HH(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
-	a += H(b, c, d) + x + _K1;
-	a = rotl(a, s) + e;
-	c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void II(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
-	a += I(b, c, d) + x + _K2;
-	a = rotl(a, s) + e;
-	c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void JJ(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
-	a += J(b, c, d) + x + _K3;
-	a = rotl(a, s) + e;
-	c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void FFF(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
-	a += F(b, c, d) + x;
-	a = rotl(a, s) + e;
-	c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void GGG(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
-	a += G(b, c, d) + x + _K4;
-	a = rotl(a, s) + e;
-	c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void HHH(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
-	a += H(b, c, d) + x + _K5;
-	a = rotl(a, s) + e;
-	c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void III(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
-	a += I(b, c, d) + x + _K6;
-	a = rotl(a, s) + e;
-	c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void JJJ(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
-	a += J(b, c, d) + x + _K7;
-	a = rotl(a, s) + e;
-	c = rotl(c, 10);
-}
-
-
-
-__device__ void ripemd160sha256(const unsigned int x[8], unsigned int digest[5])
-{
-	unsigned int a1 = _RIPEMD160_IV[0];
-	unsigned int b1 = _RIPEMD160_IV[1];
-	unsigned int c1 = _RIPEMD160_IV[2];
-	unsigned int d1 = _RIPEMD160_IV[3];
-	unsigned int e1 = _RIPEMD160_IV[4];
-
-	const unsigned int x8 = 0x00000080;
-	const unsigned int x14 = 256;
-
-	/* round 1 */
-	FF(a1, b1, c1, d1, e1, x[0], 11);
-	FF(e1, a1, b1, c1, d1, x[1], 14);
-	FF(d1, e1, a1, b1, c1, x[2], 15);
-	FF(c1, d1, e1, a1, b1, x[3], 12);
-	FF(b1, c1, d1, e1, a1, x[4], 5);
-	FF(a1, b1, c1, d1, e1, x[5], 8);
-	FF(e1, a1, b1, c1, d1, x[6], 7);
-	FF(d1, e1, a1, b1, c1, x[7], 9);
-	FF(c1, d1, e1, a1, b1, x8, 11);
-	FF(b1, c1, d1, e1, a1, 0, 13);
-	FF(a1, b1, c1, d1, e1, 0, 14);
-	FF(e1, a1, b1, c1, d1, 0, 15);
-	FF(d1, e1, a1, b1, c1, 0, 6);
-	FF(c1, d1, e1, a1, b1, 0, 7);
-	FF(b1, c1, d1, e1, a1, x14, 9);
-	FF(a1, b1, c1, d1, e1, 0, 8);
-
-	/* round 2 */
-	GG(e1, a1, b1, c1, d1, x[7], 7);
-	GG(d1, e1, a1, b1, c1, x[4], 6);
-	GG(c1, d1, e1, a1, b1, 0, 8);
-	GG(b1, c1, d1, e1, a1, x[1], 13);
-	GG(a1, b1, c1, d1, e1, 0, 11);
-	GG(e1, a1, b1, c1, d1, x[6], 9);
-	GG(d1, e1, a1, b1, c1, 0, 7);
-	GG(c1, d1, e1, a1, b1, x[3], 15);
-	GG(b1, c1, d1, e1, a1, 0, 7);
-	GG(a1, b1, c1, d1, e1, x[0], 12);
-	GG(e1, a1, b1, c1, d1, 0, 15);
-	GG(d1, e1, a1, b1, c1, x[5], 9);
-	GG(c1, d1, e1, a1, b1, x[2], 11);
-	GG(b1, c1, d1, e1, a1, x14, 7);
-	GG(a1, b1, c1, d1, e1, 0, 13);
-	GG(e1, a1, b1, c1, d1, x8, 12);
-
-	/* round 3 */
-	HH(d1, e1, a1, b1, c1, x[3], 11);
-	HH(c1, d1, e1, a1, b1, 0, 13);
-	HH(b1, c1, d1, e1, a1, x14, 6);
-	HH(a1, b1, c1, d1, e1, x[4], 7);
-	HH(e1, a1, b1, c1, d1, 0, 14);
-	HH(d1, e1, a1, b1, c1, 0, 9);
-	HH(c1, d1, e1, a1, b1, x8, 13);
-	HH(b1, c1, d1, e1, a1, x[1], 15);
-	HH(a1, b1, c1, d1, e1, x[2], 14);
-	HH(e1, a1, b1, c1, d1, x[7], 8);
-	HH(d1, e1, a1, b1, c1, x[0], 13);
-	HH(c1, d1, e1, a1, b1, x[6], 6);
-	HH(b1, c1, d1, e1, a1, 0, 5);
-	HH(a1, b1, c1, d1, e1, 0, 12);
-	HH(e1, a1, b1, c1, d1, x[5], 7);
-	HH(d1, e1, a1, b1, c1, 0, 5);
-
-	/* round 4 */
-	II(c1, d1, e1, a1, b1, x[1], 11);
-	II(b1, c1, d1, e1, a1, 0, 12);
-	II(a1, b1, c1, d1, e1, 0, 14);
-	II(e1, a1, b1, c1, d1, 0, 15);
-	II(d1, e1, a1, b1, c1, x[0], 14);
-	II(c1, d1, e1, a1, b1, x8, 15);
-	II(b1, c1, d1, e1, a1, 0, 9);
-	II(a1, b1, c1, d1, e1, x[4], 8);
-	II(e1, a1, b1, c1, d1, 0, 9);
-	II(d1, e1, a1, b1, c1, x[3], 14);
-	II(c1, d1, e1, a1, b1, x[7], 5);
-	II(b1, c1, d1, e1, a1, 0, 6);
-	II(a1, b1, c1, d1, e1, x14, 8);
-	II(e1, a1, b1, c1, d1, x[5], 6);
-	II(d1, e1, a1, b1, c1, x[6], 5);
-	II(c1, d1, e1, a1, b1, x[2], 12);
-
-	/* round 5 */
-	JJ(b1, c1, d1, e1, a1, x[4], 9);
-	JJ(a1, b1, c1, d1, e1, x[0], 15);
-	JJ(e1, a1, b1, c1, d1, x[5], 5);
-	JJ(d1, e1, a1, b1, c1, 0, 11);
-	JJ(c1, d1, e1, a1, b1, x[7], 6);
-	JJ(b1, c1, d1, e1, a1, 0, 8);
-	JJ(a1, b1, c1, d1, e1, x[2], 13);
-	JJ(e1, a1, b1, c1, d1, 0, 12);
-	JJ(d1, e1, a1, b1, c1, x14, 5);
-	JJ(c1, d1, e1, a1, b1, x[1], 12);
-	JJ(b1, c1, d1, e1, a1, x[3], 13);
-	JJ(a1, b1, c1, d1, e1, x8, 14);
-	JJ(e1, a1, b1, c1, d1, 0, 11);
-	JJ(d1, e1, a1, b1, c1, x[6], 8);
-	JJ(c1, d1, e1, a1, b1, 0, 5);
-	JJ(b1, c1, d1, e1, a1, 0, 6);
-
-	unsigned int a2 = _RIPEMD160_IV[0];
-	unsigned int b2 = _RIPEMD160_IV[1];
-	unsigned int c2 = _RIPEMD160_IV[2];
-	unsigned int d2 = _RIPEMD160_IV[3];
-	unsigned int e2 = _RIPEMD160_IV[4];
-
-	/* parallel round 1 */
-	JJJ(a2, b2, c2, d2, e2, x[5], 8);
-	JJJ(e2, a2, b2, c2, d2, x14, 9);
-	JJJ(d2, e2, a2, b2, c2, x[7], 9);
-	JJJ(c2, d2, e2, a2, b2, x[0], 11);
-	JJJ(b2, c2, d2, e2, a2, 0, 13);
-	JJJ(a2, b2, c2, d2, e2, x[2], 15);
-	JJJ(e2, a2, b2, c2, d2, 0, 15);
-	JJJ(d2, e2, a2, b2, c2, x[4], 5);
-	JJJ(c2, d2, e2, a2, b2, 0, 7);
-	JJJ(b2, c2, d2, e2, a2, x[6], 7);
-	JJJ(a2, b2, c2, d2, e2, 0, 8);
-	JJJ(e2, a2, b2, c2, d2, x8, 11);
-	JJJ(d2, e2, a2, b2, c2, x[1], 14);
-	JJJ(c2, d2, e2, a2, b2, 0, 14);
-	JJJ(b2, c2, d2, e2, a2, x[3], 12);
-	JJJ(a2, b2, c2, d2, e2, 0, 6);
-
-	/* parallel round 2 */
-	III(e2, a2, b2, c2, d2, x[6], 9);
-	III(d2, e2, a2, b2, c2, 0, 13);
-	III(c2, d2, e2, a2, b2, x[3], 15);
-	III(b2, c2, d2, e2, a2, x[7], 7);
-	III(a2, b2, c2, d2, e2, x[0], 12);
-	III(e2, a2, b2, c2, d2, 0, 8);
-	III(d2, e2, a2, b2, c2, x[5], 9);
-	III(c2, d2, e2, a2, b2, 0, 11);
-	III(b2, c2, d2, e2, a2, x14, 7);
-	III(a2, b2, c2, d2, e2, 0, 7);
-	III(e2, a2, b2, c2, d2, x8, 12);
-	III(d2, e2, a2, b2, c2, 0, 7);
-	III(c2, d2, e2, a2, b2, x[4], 6);
-	III(b2, c2, d2, e2, a2, 0, 15);
-	III(a2, b2, c2, d2, e2, x[1], 13);
-	III(e2, a2, b2, c2, d2, x[2], 11);
-
-	/* parallel round 3 */
-	HHH(d2, e2, a2, b2, c2, 0, 9);
-	HHH(c2, d2, e2, a2, b2, x[5], 7);
-	HHH(b2, c2, d2, e2, a2, x[1], 15);
-	HHH(a2, b2, c2, d2, e2, x[3], 11);
-	HHH(e2, a2, b2, c2, d2, x[7], 8);
-	HHH(d2, e2, a2, b2, c2, x14, 6);
-	HHH(c2, d2, e2, a2, b2, x[6], 6);
-	HHH(b2, c2, d2, e2, a2, 0, 14);
-	HHH(a2, b2, c2, d2, e2, 0, 12);
-	HHH(e2, a2, b2, c2, d2, x8, 13);
-	HHH(d2, e2, a2, b2, c2, 0, 5);
-	HHH(c2, d2, e2, a2, b2, x[2], 14);
-	HHH(b2, c2, d2, e2, a2, 0, 13);
-	HHH(a2, b2, c2, d2, e2, x[0], 13);
-	HHH(e2, a2, b2, c2, d2, x[4], 7);
-	HHH(d2, e2, a2, b2, c2, 0, 5);
-
-	/* parallel round 4 */
-	GGG(c2, d2, e2, a2, b2, x8, 15);
-	GGG(b2, c2, d2, e2, a2, x[6], 5);
-	GGG(a2, b2, c2, d2, e2, x[4], 8);
-	GGG(e2, a2, b2, c2, d2, x[1], 11);
-	GGG(d2, e2, a2, b2, c2, x[3], 14);
-	GGG(c2, d2, e2, a2, b2, 0, 14);
-	GGG(b2, c2, d2, e2, a2, 0, 6);
-	GGG(a2, b2, c2, d2, e2, x[0], 14);
-	GGG(e2, a2, b2, c2, d2, x[5], 6);
-	GGG(d2, e2, a2, b2, c2, 0, 9);
-	GGG(c2, d2, e2, a2, b2, x[2], 12);
-	GGG(b2, c2, d2, e2, a2, 0, 9);
-	GGG(a2, b2, c2, d2, e2, 0, 12);
-	GGG(e2, a2, b2, c2, d2, x[7], 5);
-	GGG(d2, e2, a2, b2, c2, 0, 15);
-	GGG(c2, d2, e2, a2, b2, x14, 8);
-
-	/* parallel round 5 */
-	FFF(b2, c2, d2, e2, a2, 0, 8);
-	FFF(a2, b2, c2, d2, e2, 0, 5);
-	FFF(e2, a2, b2, c2, d2, 0, 12);
-	FFF(d2, e2, a2, b2, c2, x[4], 9);
-	FFF(c2, d2, e2, a2, b2, x[1], 12);
-	FFF(b2, c2, d2, e2, a2, x[5], 5);
-	FFF(a2, b2, c2, d2, e2, x8, 14);
-	FFF(e2, a2, b2, c2, d2, x[7], 6);
-	FFF(d2, e2, a2, b2, c2, x[6], 8);
-	FFF(c2, d2, e2, a2, b2, x[2], 13);
-	FFF(b2, c2, d2, e2, a2, 0, 6);
-	FFF(a2, b2, c2, d2, e2, x14, 5);
-	FFF(e2, a2, b2, c2, d2, x[0], 15);
-	FFF(d2, e2, a2, b2, c2, x[3], 13);
-	FFF(c2, d2, e2, a2, b2, 0, 11);
-	FFF(b2, c2, d2, e2, a2, 0, 11);
-
-	digest[0] = _RIPEMD160_IV[1] + c1 + d2;
-	digest[1] = _RIPEMD160_IV[2] + d1 + e2;
-	digest[2] = _RIPEMD160_IV[3] + e1 + a2;
-	digest[3] = _RIPEMD160_IV[4] + a1 + b2;
-	digest[4] = _RIPEMD160_IV[0] + b1 + c2;
-}
-
-
-
-__device__ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5])
-{
-	unsigned int a1 = _RIPEMD160_IV[0];
-	unsigned int b1 = _RIPEMD160_IV[1];
-	unsigned int c1 = _RIPEMD160_IV[2];
-	unsigned int d1 = _RIPEMD160_IV[3];
-	unsigned int e1 = _RIPEMD160_IV[4];
-
-	const unsigned int x8 = 0x00000080;
-	const unsigned int x14 = 256;
-
-	/* round 1 */
-	FF(a1, b1, c1, d1, e1, x[0], 11);
-	FF(e1, a1, b1, c1, d1, x[1], 14);
-	FF(d1, e1, a1, b1, c1, x[2], 15);
-	FF(c1, d1, e1, a1, b1, x[3], 12);
-	FF(b1, c1, d1, e1, a1, x[4], 5);
-	FF(a1, b1, c1, d1, e1, x[5], 8);
-	FF(e1, a1, b1, c1, d1, x[6], 7);
-	FF(d1, e1, a1, b1, c1, x[7], 9);
-	FF(c1, d1, e1, a1, b1, x8, 11);
-	FF(b1, c1, d1, e1, a1, 0, 13);
-	FF(a1, b1, c1, d1, e1, 0, 14);
-	FF(e1, a1, b1, c1, d1, 0, 15);
-	FF(d1, e1, a1, b1, c1, 0, 6);
-	FF(c1, d1, e1, a1, b1, 0, 7);
-	FF(b1, c1, d1, e1, a1, x14, 9);
-	FF(a1, b1, c1, d1, e1, 0, 8);
-
-	/* round 2 */
-	GG(e1, a1, b1, c1, d1, x[7], 7);
-	GG(d1, e1, a1, b1, c1, x[4], 6);
-	GG(c1, d1, e1, a1, b1, 0, 8);
-	GG(b1, c1, d1, e1, a1, x[1], 13);
-	GG(a1, b1, c1, d1, e1, 0, 11);
-	GG(e1, a1, b1, c1, d1, x[6], 9);
-	GG(d1, e1, a1, b1, c1, 0, 7);
-	GG(c1, d1, e1, a1, b1, x[3], 15);
-	GG(b1, c1, d1, e1, a1, 0, 7);
-	GG(a1, b1, c1, d1, e1, x[0], 12);
-	GG(e1, a1, b1, c1, d1, 0, 15);
-	GG(d1, e1, a1, b1, c1, x[5], 9);
-	GG(c1, d1, e1, a1, b1, x[2], 11);
-	GG(b1, c1, d1, e1, a1, x14, 7);
-	GG(a1, b1, c1, d1, e1, 0, 13);
-	GG(e1, a1, b1, c1, d1, x8, 12);
-
-	/* round 3 */
-	HH(d1, e1, a1, b1, c1, x[3], 11);
-	HH(c1, d1, e1, a1, b1, 0, 13);
-	HH(b1, c1, d1, e1, a1, x14, 6);
-	HH(a1, b1, c1, d1, e1, x[4], 7);
-	HH(e1, a1, b1, c1, d1, 0, 14);
-	HH(d1, e1, a1, b1, c1, 0, 9);
-	HH(c1, d1, e1, a1, b1, x8, 13);
-	HH(b1, c1, d1, e1, a1, x[1], 15);
-	HH(a1, b1, c1, d1, e1, x[2], 14);
-	HH(e1, a1, b1, c1, d1, x[7], 8);
-	HH(d1, e1, a1, b1, c1, x[0], 13);
-	HH(c1, d1, e1, a1, b1, x[6], 6);
-	HH(b1, c1, d1, e1, a1, 0, 5);
-	HH(a1, b1, c1, d1, e1, 0, 12);
-	HH(e1, a1, b1, c1, d1, x[5], 7);
-	HH(d1, e1, a1, b1, c1, 0, 5);
-
-	/* round 4 */
-	II(c1, d1, e1, a1, b1, x[1], 11);
-	II(b1, c1, d1, e1, a1, 0, 12);
-	II(a1, b1, c1, d1, e1, 0, 14);
-	II(e1, a1, b1, c1, d1, 0, 15);
-	II(d1, e1, a1, b1, c1, x[0], 14);
-	II(c1, d1, e1, a1, b1, x8, 15);
-	II(b1, c1, d1, e1, a1, 0, 9);
-	II(a1, b1, c1, d1, e1, x[4], 8);
-	II(e1, a1, b1, c1, d1, 0, 9);
-	II(d1, e1, a1, b1, c1, x[3], 14);
-	II(c1, d1, e1, a1, b1, x[7], 5);
-	II(b1, c1, d1, e1, a1, 0, 6);
-	II(a1, b1, c1, d1, e1, x14, 8);
-	II(e1, a1, b1, c1, d1, x[5], 6);
-	II(d1, e1, a1, b1, c1, x[6], 5);
-	II(c1, d1, e1, a1, b1, x[2], 12);
-
-	/* round 5 */
-	JJ(b1, c1, d1, e1, a1, x[4], 9);
-	JJ(a1, b1, c1, d1, e1, x[0], 15);
-	JJ(e1, a1, b1, c1, d1, x[5], 5);
-	JJ(d1, e1, a1, b1, c1, 0, 11);
-	JJ(c1, d1, e1, a1, b1, x[7], 6);
-	JJ(b1, c1, d1, e1, a1, 0, 8);
-	JJ(a1, b1, c1, d1, e1, x[2], 13);
-	JJ(e1, a1, b1, c1, d1, 0, 12);
-	JJ(d1, e1, a1, b1, c1, x14, 5);
-	JJ(c1, d1, e1, a1, b1, x[1], 12);
-	JJ(b1, c1, d1, e1, a1, x[3], 13);
-	JJ(a1, b1, c1, d1, e1, x8, 14);
-	JJ(e1, a1, b1, c1, d1, 0, 11);
-	JJ(d1, e1, a1, b1, c1, x[6], 8);
-	JJ(c1, d1, e1, a1, b1, 0, 5);
-	JJ(b1, c1, d1, e1, a1, 0, 6);
-
-	unsigned int a2 = _RIPEMD160_IV[0];
-	unsigned int b2 = _RIPEMD160_IV[1];
-	unsigned int c2 = _RIPEMD160_IV[2];
-	unsigned int d2 = _RIPEMD160_IV[3];
-	unsigned int e2 = _RIPEMD160_IV[4];
-
-	/* parallel round 1 */
-	JJJ(a2, b2, c2, d2, e2, x[5], 8);
-	JJJ(e2, a2, b2, c2, d2, x14, 9);
-	JJJ(d2, e2, a2, b2, c2, x[7], 9);
-	JJJ(c2, d2, e2, a2, b2, x[0], 11);
-	JJJ(b2, c2, d2, e2, a2, 0, 13);
-	JJJ(a2, b2, c2, d2, e2, x[2], 15);
-	JJJ(e2, a2, b2, c2, d2, 0, 15);
-	JJJ(d2, e2, a2, b2, c2, x[4], 5);
-	JJJ(c2, d2, e2, a2, b2, 0, 7);
-	JJJ(b2, c2, d2, e2, a2, x[6], 7);
-	JJJ(a2, b2, c2, d2, e2, 0, 8);
-	JJJ(e2, a2, b2, c2, d2, x8, 11);
-	JJJ(d2, e2, a2, b2, c2, x[1], 14);
-	JJJ(c2, d2, e2, a2, b2, 0, 14);
-	JJJ(b2, c2, d2, e2, a2, x[3], 12);
-	JJJ(a2, b2, c2, d2, e2, 0, 6);
-
-	/* parallel round 2 */
-	III(e2, a2, b2, c2, d2, x[6], 9);
-	III(d2, e2, a2, b2, c2, 0, 13);
-	III(c2, d2, e2, a2, b2, x[3], 15);
-	III(b2, c2, d2, e2, a2, x[7], 7);
-	III(a2, b2, c2, d2, e2, x[0], 12);
-	III(e2, a2, b2, c2, d2, 0, 8);
-	III(d2, e2, a2, b2, c2, x[5], 9);
-	III(c2, d2, e2, a2, b2, 0, 11);
-	III(b2, c2, d2, e2, a2, x14, 7);
-	III(a2, b2, c2, d2, e2, 0, 7);
-	III(e2, a2, b2, c2, d2, x8, 12);
-	III(d2, e2, a2, b2, c2, 0, 7);
-	III(c2, d2, e2, a2, b2, x[4], 6);
-	III(b2, c2, d2, e2, a2, 0, 15);
-	III(a2, b2, c2, d2, e2, x[1], 13);
-	III(e2, a2, b2, c2, d2, x[2], 11);
-
-	/* parallel round 3 */
-	HHH(d2, e2, a2, b2, c2, 0, 9);
-	HHH(c2, d2, e2, a2, b2, x[5], 7);
-	HHH(b2, c2, d2, e2, a2, x[1], 15);
-	HHH(a2, b2, c2, d2, e2, x[3], 11);
-	HHH(e2, a2, b2, c2, d2, x[7], 8);
-	HHH(d2, e2, a2, b2, c2, x14, 6);
-	HHH(c2, d2, e2, a2, b2, x[6], 6);
-	HHH(b2, c2, d2, e2, a2, 0, 14);
-	HHH(a2, b2, c2, d2, e2, 0, 12);
-	HHH(e2, a2, b2, c2, d2, x8, 13);
-	HHH(d2, e2, a2, b2, c2, 0, 5);
-	HHH(c2, d2, e2, a2, b2, x[2], 14);
-	HHH(b2, c2, d2, e2, a2, 0, 13);
-	HHH(a2, b2, c2, d2, e2, x[0], 13);
-	HHH(e2, a2, b2, c2, d2, x[4], 7);
-	HHH(d2, e2, a2, b2, c2, 0, 5);
-
-	/* parallel round 4 */
-	GGG(c2, d2, e2, a2, b2, x8, 15);
-	GGG(b2, c2, d2, e2, a2, x[6], 5);
-	GGG(a2, b2, c2, d2, e2, x[4], 8);
-	GGG(e2, a2, b2, c2, d2, x[1], 11);
-	GGG(d2, e2, a2, b2, c2, x[3], 14);
-	GGG(c2, d2, e2, a2, b2, 0, 14);
-	GGG(b2, c2, d2, e2, a2, 0, 6);
-	GGG(a2, b2, c2, d2, e2, x[0], 14);
-	GGG(e2, a2, b2, c2, d2, x[5], 6);
-	GGG(d2, e2, a2, b2, c2, 0, 9);
-	GGG(c2, d2, e2, a2, b2, x[2], 12);
-	GGG(b2, c2, d2, e2, a2, 0, 9);
-	GGG(a2, b2, c2, d2, e2, 0, 12);
-	GGG(e2, a2, b2, c2, d2, x[7], 5);
-	GGG(d2, e2, a2, b2, c2, 0, 15);
-	GGG(c2, d2, e2, a2, b2, x14, 8);
-
-	/* parallel round 5 */
-	FFF(b2, c2, d2, e2, a2, 0, 8);
-	FFF(a2, b2, c2, d2, e2, 0, 5);
-	FFF(e2, a2, b2, c2, d2, 0, 12);
-	FFF(d2, e2, a2, b2, c2, x[4], 9);
-	FFF(c2, d2, e2, a2, b2, x[1], 12);
-	FFF(b2, c2, d2, e2, a2, x[5], 5);
-	FFF(a2, b2, c2, d2, e2, x8, 14);
-	FFF(e2, a2, b2, c2, d2, x[7], 6);
-	FFF(d2, e2, a2, b2, c2, x[6], 8);
-	FFF(c2, d2, e2, a2, b2, x[2], 13);
-	FFF(b2, c2, d2, e2, a2, 0, 6);
-	FFF(a2, b2, c2, d2, e2, x14, 5);
-	FFF(e2, a2, b2, c2, d2, x[0], 15);
-	FFF(d2, e2, a2, b2, c2, x[3], 13);
-	FFF(c2, d2, e2, a2, b2, 0, 11);
-	FFF(b2, c2, d2, e2, a2, 0, 11);
-
-	digest[0] = c1 + d2;
-	digest[1] = d1 + e2;
-	digest[2] = e1 + a2;
-	digest[3] = a1 + b2;
-	digest[4] = b1 + c2;
-}
-#endif
\ No newline at end of file
diff --git a/cudaMath/secp256k1.cuh b/cudaMath/secp256k1.cuh
deleted file mode 100644
index 88a3fed..0000000
--- a/cudaMath/secp256k1.cuh
+++ /dev/null
@@ -1,802 +0,0 @@
-#ifndef _SECP256K1_CUH
-#define _SECP256K1_CUH
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-
-#include "ptx.cuh"
-
-
-/**
- Prime modulus 2^256 - 2^32 - 977
- */
-__constant__ static unsigned int _P[8] = {
-	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
-};
-
-/**
- Base point X
- */
-__constant__ static unsigned int _GX[8] = {
-	0x79BE667E, 0xF9DCBBAC, 0x55A06295, 0xCE870B07, 0x029BFCDB, 0x2DCE28D9, 0x59F2815B, 0x16F81798
-};
-
-
-/**
- Base point Y
- */
-__constant__ static unsigned int _GY[8] = {
-	0x483ADA77, 0x26A3C465, 0x5DA4FBFC, 0x0E1108A8, 0xFD17B448, 0xA6855419, 0x9C47D08F, 0xFB10D4B8
-};
-
-
-/**
- * Group order
- */
-__constant__ static unsigned int _N[8] = {
-	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xBAAEDCE6, 0xAF48A03B, 0xBFD25E8C, 0xD0364141
-};
-
-__constant__ static unsigned int _BETA[8] = {
-	0x7AE96A2B, 0x657C0710, 0x6E64479E, 0xAC3434E9, 0x9CF04975, 0x12F58995, 0xC1396C28, 0x719501EE
-};
-
-
-__constant__ static unsigned int _LAMBDA[8] = {
-	0x5363AD4C, 0xC05C30E0, 0xA5261C02, 0x8812645A, 0x122E22EA, 0x20816678, 0xDF02967C, 0x1B23BD72
-};
-
-
-__device__ __forceinline__ bool isInfinity(const unsigned int x[8])
-{
-	bool isf = true;
-
-	for(int i = 0; i < 8; i++) {
-		if(x[i] != 0xffffffff) {
-			isf = false;
-		}
-	}
-
-	return isf;
-}
-
-__device__ __forceinline__ static void copyBigInt(const unsigned int src[8], unsigned int dest[8])
-{
-	for(int i = 0; i < 8; i++) {
-		dest[i] = src[i];
-	}
-}
-
-__device__ static bool equal(const unsigned int *a, const unsigned int *b)
-{
-	bool eq = true;
-
-	for(int i = 0; i < 8; i++) {
-		eq &= (a[i] == b[i]);
-	}
-
-	return eq;
-}
-
-/**
- * Reads an 8-word big integer from device memory
- */
-__device__ static void readInt(const unsigned int *ara, int idx, unsigned int x[8])
-{
-	int totalThreads = gridDim.x * blockDim.x;
-
-	int base = idx * totalThreads * 8;
-
-	int threadId = blockDim.x * blockIdx.x + threadIdx.x;
-
-	int index = base + threadId;
-
-	for (int i = 0; i < 8; i++) {
-		x[i] = ara[index];
-		index += totalThreads;
-	}
-}
-
-__device__ static unsigned int readIntLSW(const unsigned int *ara, int idx)
-{
-	int totalThreads = gridDim.x * blockDim.x;
-
-	int base = idx * totalThreads * 8;
-
-	int threadId = blockDim.x * blockIdx.x + threadIdx.x;
-
-	int index = base + threadId;
-
-	return ara[index + totalThreads * 7];
-}
-
-/**
- * Writes an 8-word big integer to device memory
- */
-__device__ static void writeInt(unsigned int *ara, int idx, const unsigned int x[8])
-{
-	int totalThreads = gridDim.x * blockDim.x;
-
-	int base = idx * totalThreads * 8;
-
-	int threadId = blockDim.x * blockIdx.x + threadIdx.x;
-
-	int index = base + threadId;
-
-	for (int i = 0; i < 8; i++) {
-		ara[index] = x[i];
-		index += totalThreads;
-	}
-}
-
-/**
- * Subtraction mod p
- */
-__device__ static void subModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
-	sub_cc(c[7], a[7], b[7]);
-	subc_cc(c[6], a[6], b[6]);
-	subc_cc(c[5], a[5], b[5]);
-	subc_cc(c[4], a[4], b[4]);
-	subc_cc(c[3], a[3], b[3]);
-	subc_cc(c[2], a[2], b[2]);
-	subc_cc(c[1], a[1], b[1]);
-	subc_cc(c[0], a[0], b[0]);
-
-	unsigned int borrow = 0;
-	subc(borrow, 0, 0);
-
-	if (borrow) {
-		add_cc(c[7], c[7], _P[7]);
-		addc_cc(c[6], c[6], _P[6]);
-		addc_cc(c[5], c[5], _P[5]);
-		addc_cc(c[4], c[4], _P[4]);
-		addc_cc(c[3], c[3], _P[3]);
-		addc_cc(c[2], c[2], _P[2]);
-		addc_cc(c[1], c[1], _P[1]);
-		addc(c[0], c[0], _P[0]);
-	}
-}
-
-__device__ static unsigned int add(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
-	add_cc(c[7], a[7], b[7]);
-	addc_cc(c[6], a[6], b[6]);
-	addc_cc(c[5], a[5], b[5]);
-	addc_cc(c[4], a[4], b[4]);
-	addc_cc(c[3], a[3], b[3]);
-	addc_cc(c[2], a[2], b[2]);
-	addc_cc(c[1], a[1], b[1]);
-	addc_cc(c[0], a[0], b[0]);
-
-	unsigned int carry = 0;
-	addc(carry, 0, 0);
-
-	return carry;
-}
-
-__device__ static unsigned int sub(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
-	sub_cc(c[7], a[7], b[7]);
-	subc_cc(c[6], a[6], b[6]);
-	subc_cc(c[5], a[5], b[5]);
-	subc_cc(c[4], a[4], b[4]);
-	subc_cc(c[3], a[3], b[3]);
-	subc_cc(c[2], a[2], b[2]);
-	subc_cc(c[1], a[1], b[1]);
-	subc_cc(c[0], a[0], b[0]);
-
-	unsigned int borrow = 0;
-	subc(borrow, 0, 0);
-
-	return (borrow & 0x01);
-}
-
-
-__device__ static void addModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
-	add_cc(c[7], a[7], b[7]);
-	addc_cc(c[6], a[6], b[6]);
-	addc_cc(c[5], a[5], b[5]);
-	addc_cc(c[4], a[4], b[4]);
-	addc_cc(c[3], a[3], b[3]);
-	addc_cc(c[2], a[2], b[2]);
-	addc_cc(c[1], a[1], b[1]);
-	addc_cc(c[0], a[0], b[0]);
-
-	unsigned int carry = 0;
-	addc(carry, 0, 0);
-
-	bool gt = false;
-	for(int i = 0; i < 8; i++) {
-		if(c[i] > _P[i]) {
-			gt = true;
-			break;
-		} else if(c[i] < _P[i]) {
-			break;
-		}
-	}
-
-	if(carry || gt) {
-		sub_cc(c[7], c[7], _P[7]);
-		subc_cc(c[6], c[6], _P[6]);
-		subc_cc(c[5], c[5], _P[5]);
-		subc_cc(c[4], c[4], _P[4]);
-		subc_cc(c[3], c[3], _P[3]);
-		subc_cc(c[2], c[2], _P[2]);
-		subc_cc(c[1], c[1], _P[1]);
-		subc(c[0], c[0], _P[0]);
-	}
-}
-
-
-
-__device__ static void mulModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
-	unsigned int high[8] = { 0 };
-
-	unsigned int t = a[7];
-
-	// a[7] * b (low)
-	for(int i = 7; i >= 0; i--) {
-		c[i] = t * b[i];
-	}
-
-	// a[7] * b (high)
-	mad_hi_cc(c[6], t, b[7], c[6]);
-	madc_hi_cc(c[5], t, b[6], c[5]);
-	madc_hi_cc(c[4], t, b[5], c[4]);
-	madc_hi_cc(c[3], t, b[4], c[3]);
-	madc_hi_cc(c[2], t, b[3], c[2]);
-	madc_hi_cc(c[1], t, b[2], c[1]);
-	madc_hi_cc(c[0], t, b[1], c[0]);
-	madc_hi(high[7], t, b[0], high[7]);
-
-
-
-	// a[6] * b (low)
-	t = a[6];
-	mad_lo_cc(c[6], t, b[7], c[6]);
-	madc_lo_cc(c[5], t, b[6], c[5]);
-	madc_lo_cc(c[4], t, b[5], c[4]);
-	madc_lo_cc(c[3], t, b[4], c[3]);
-	madc_lo_cc(c[2], t, b[3], c[2]);
-	madc_lo_cc(c[1], t, b[2], c[1]);
-	madc_lo_cc(c[0], t, b[1], c[0]);
-	madc_lo_cc(high[7], t, b[0], high[7]);
-	addc(high[6], high[6], 0);
-
-	// a[6] * b (high)
-	mad_hi_cc(c[5], t, b[7], c[5]);
-	madc_hi_cc(c[4], t, b[6], c[4]);
-	madc_hi_cc(c[3], t, b[5], c[3]);
-	madc_hi_cc(c[2], t, b[4], c[2]);
-	madc_hi_cc(c[1], t, b[3], c[1]);
-	madc_hi_cc(c[0], t, b[2], c[0]);
-	madc_hi_cc(high[7], t, b[1], high[7]);
-	madc_hi(high[6], t, b[0], high[6]);
-
-	// a[5] * b (low)
-	t = a[5];
-	mad_lo_cc(c[5], t, b[7], c[5]);
-	madc_lo_cc(c[4], t, b[6], c[4]);
-	madc_lo_cc(c[3], t, b[5], c[3]);
-	madc_lo_cc(c[2], t, b[4], c[2]);
-	madc_lo_cc(c[1], t, b[3], c[1]);
-	madc_lo_cc(c[0], t, b[2], c[0]);
-	madc_lo_cc(high[7], t, b[1], high[7]);
-	madc_lo_cc(high[6], t, b[0], high[6]);
-	addc(high[5], high[5], 0);
-
-	// a[5] * b (high)
-	mad_hi_cc(c[4], t, b[7], c[4]);
-	madc_hi_cc(c[3], t, b[6], c[3]);
-	madc_hi_cc(c[2], t, b[5], c[2]);
-	madc_hi_cc(c[1], t, b[4], c[1]);
-	madc_hi_cc(c[0], t, b[3], c[0]);
-	madc_hi_cc(high[7], t, b[2], high[7]);
-	madc_hi_cc(high[6], t, b[1], high[6]);
-	madc_hi(high[5], t, b[0], high[5]);
-
-
-
-	// a[4] * b (low)
-	t = a[4];
-	mad_lo_cc(c[4], t, b[7], c[4]);
-	madc_lo_cc(c[3], t, b[6], c[3]);
-	madc_lo_cc(c[2], t, b[5], c[2]);
-	madc_lo_cc(c[1], t, b[4], c[1]);
-	madc_lo_cc(c[0], t, b[3], c[0]);
-	madc_lo_cc(high[7], t, b[2], high[7]);
-	madc_lo_cc(high[6], t, b[1], high[6]);
-	madc_lo_cc(high[5], t, b[0], high[5]);
-	addc(high[4], high[4], 0);
-
-	// a[4] * b (high)
-	mad_hi_cc(c[3], t, b[7], c[3]);
-	madc_hi_cc(c[2], t, b[6], c[2]);
-	madc_hi_cc(c[1], t, b[5], c[1]);
-	madc_hi_cc(c[0], t, b[4], c[0]);
-	madc_hi_cc(high[7], t, b[3], high[7]);
-	madc_hi_cc(high[6], t, b[2], high[6]);
-	madc_hi_cc(high[5], t, b[1], high[5]);
-	madc_hi(high[4], t, b[0], high[4]);
-
-
-
-	// a[3] * b (low)
-	t = a[3];
-	mad_lo_cc(c[3], t, b[7], c[3]);
-	madc_lo_cc(c[2], t, b[6], c[2]);
-	madc_lo_cc(c[1], t, b[5], c[1]);
-	madc_lo_cc(c[0], t, b[4], c[0]);
-	madc_lo_cc(high[7], t, b[3], high[7]);
-	madc_lo_cc(high[6], t, b[2], high[6]);
-	madc_lo_cc(high[5], t, b[1], high[5]);
-	madc_lo_cc(high[4], t, b[0], high[4]);
-	addc(high[3], high[3], 0);
-
-	// a[3] * b (high)
-	mad_hi_cc(c[2], t, b[7], c[2]);
-	madc_hi_cc(c[1], t, b[6], c[1]);
-	madc_hi_cc(c[0], t, b[5], c[0]);
-	madc_hi_cc(high[7], t, b[4], high[7]);
-	madc_hi_cc(high[6], t, b[3], high[6]);
-	madc_hi_cc(high[5], t, b[2], high[5]);
-	madc_hi_cc(high[4], t, b[1], high[4]);
-	madc_hi(high[3], t, b[0], high[3]);
-
-
-
-	// a[2] * b (low)
-	t = a[2];
-	mad_lo_cc(c[2], t, b[7], c[2]);
-	madc_lo_cc(c[1], t, b[6], c[1]);
-	madc_lo_cc(c[0], t, b[5], c[0]);
-	madc_lo_cc(high[7], t, b[4], high[7]);
-	madc_lo_cc(high[6], t, b[3], high[6]);
-	madc_lo_cc(high[5], t, b[2], high[5]);
-	madc_lo_cc(high[4], t, b[1], high[4]);
-	madc_lo_cc(high[3], t, b[0], high[3]);
-	addc(high[2], high[2], 0);
-
-	// a[2] * b (high)
-	mad_hi_cc(c[1], t, b[7], c[1]);
-	madc_hi_cc(c[0], t, b[6], c[0]);
-	madc_hi_cc(high[7], t, b[5], high[7]);
-	madc_hi_cc(high[6], t, b[4], high[6]);
-	madc_hi_cc(high[5], t, b[3], high[5]);
-	madc_hi_cc(high[4], t, b[2], high[4]);
-	madc_hi_cc(high[3], t, b[1], high[3]);
-	madc_hi(high[2], t, b[0], high[2]);
-
-
-
-	// a[1] * b (low)
-	t = a[1];
-	mad_lo_cc(c[1], t, b[7], c[1]);
-	madc_lo_cc(c[0], t, b[6], c[0]);
-	madc_lo_cc(high[7], t, b[5], high[7]);
-	madc_lo_cc(high[6], t, b[4], high[6]);
-	madc_lo_cc(high[5], t, b[3], high[5]);
-	madc_lo_cc(high[4], t, b[2], high[4]);
-	madc_lo_cc(high[3], t, b[1], high[3]);
-	madc_lo_cc(high[2], t, b[0], high[2]);
-	addc(high[1], high[1], 0);
-
-	// a[1] * b (high)
-	mad_hi_cc(c[0], t, b[7], c[0]);
-	madc_hi_cc(high[7], t, b[6], high[7]);
-	madc_hi_cc(high[6], t, b[5], high[6]);
-	madc_hi_cc(high[5], t, b[4], high[5]);
-	madc_hi_cc(high[4], t, b[3], high[4]);
-	madc_hi_cc(high[3], t, b[2], high[3]);
-	madc_hi_cc(high[2], t, b[1], high[2]);
-	madc_hi(high[1], t, b[0], high[1]);
-
-
-
-	// a[0] * b (low)
-	t = a[0];
-	mad_lo_cc(c[0], t, b[7], c[0]);
-	madc_lo_cc(high[7], t, b[6], high[7]);
-	madc_lo_cc(high[6], t, b[5], high[6]);
-	madc_lo_cc(high[5], t, b[4], high[5]);
-	madc_lo_cc(high[4], t, b[3], high[4]);
-	madc_lo_cc(high[3], t, b[2], high[3]);
-	madc_lo_cc(high[2], t, b[1], high[2]);
-	madc_lo_cc(high[1], t, b[0], high[1]);
-	addc(high[0], high[0], 0);
-
-	// a[0] * b (high)
-	mad_hi_cc(high[7], t, b[7], high[7]);
-	madc_hi_cc(high[6], t, b[6], high[6]);
-	madc_hi_cc(high[5], t, b[5], high[5]);
-	madc_hi_cc(high[4], t, b[4], high[4]);
-	madc_hi_cc(high[3], t, b[3], high[3]);
-	madc_hi_cc(high[2], t, b[2], high[2]);
-	madc_hi_cc(high[1], t, b[1], high[1]);
-	madc_hi(high[0], t, b[0], high[0]);
-
-
-
-	// At this point we have 16 32-bit words representing a 512-bit value
-	// high[0 ... 7] and c[0 ... 7]
-	const unsigned int s = 977;
-
-	// Store high[6] and high[7] since they will be overwritten
-	unsigned int high7 = high[7];
-	unsigned int high6 = high[6];
-
-
-	// Take high 256 bits, multiply by 2^32, add to low 256 bits
-	// That is, take high[0 ... 7], shift it left 1 word and add it to c[0 ... 7]
-	add_cc(c[6], high[7], c[6]);
-	addc_cc(c[5], high[6], c[5]);
-	addc_cc(c[4], high[5], c[4]);
-	addc_cc(c[3], high[4], c[3]);
-	addc_cc(c[2], high[3], c[2]);
-	addc_cc(c[1], high[2], c[1]);
-	addc_cc(c[0], high[1], c[0]);
-	addc_cc(high[7], high[0], 0);
-	addc(high[6], 0, 0);
-
-
-	// Take high 256 bits, multiply by 977, add to low 256 bits
-	// That is, take high[0 ... 5], high6, high7, multiply by 977 and add to c[0 ... 7]
-	mad_lo_cc(c[7], high7, s, c[7]);
-	madc_lo_cc(c[6], high6, s, c[6]);
-	madc_lo_cc(c[5], high[5], s, c[5]);
-	madc_lo_cc(c[4], high[4], s, c[4]);
-	madc_lo_cc(c[3], high[3], s, c[3]);
-	madc_lo_cc(c[2], high[2], s, c[2]);
-	madc_lo_cc(c[1], high[1], s, c[1]);
-	madc_lo_cc(c[0], high[0], s, c[0]);
-	addc_cc(high[7], high[7], 0);
-	addc(high[6], high[6], 0);
-
-
-	mad_hi_cc(c[6], high7, s, c[6]);
-	madc_hi_cc(c[5], high6, s, c[5]);
-	madc_hi_cc(c[4], high[5], s, c[4]);
-	madc_hi_cc(c[3], high[4], s, c[3]);
-	madc_hi_cc(c[2], high[3], s, c[2]);
-	madc_hi_cc(c[1], high[2], s, c[1]);
-	madc_hi_cc(c[0], high[1], s, c[0]);
-	madc_hi_cc(high[7], high[0], s, high[7]);
-	addc(high[6], high[6], 0);
-
-
-	// Repeat the same steps, but this time we only need to handle high[6] and high[7]
-	high7 = high[7];
-	high6 = high[6];
-
-	// Take the high 64 bits, multiply by 2^32 and add to the low 256 bits
-	add_cc(c[6], high[7], c[6]);
-	addc_cc(c[5], high[6], c[5]);
-	addc_cc(c[4], c[4], 0);
-	addc_cc(c[3], c[3], 0);
-	addc_cc(c[2], c[2], 0);
-	addc_cc(c[1], c[1], 0);
-	addc_cc(c[0], c[0], 0);
-	addc(high[7], 0, 0);
-
-
-	// Take the high 64 bits, multiply by 977 and add to the low 256 bits
-	mad_lo_cc(c[7], high7, s, c[7]);
-	madc_lo_cc(c[6], high6, s, c[6]);
-	addc_cc(c[5], c[5], 0);
-	addc_cc(c[4], c[4], 0);
-	addc_cc(c[3], c[3], 0);
-	addc_cc(c[2], c[2], 0);
-	addc_cc(c[1], c[1], 0);
-	addc_cc(c[0], c[0], 0);
-	addc(high[7], high[7], 0);
-
-	mad_hi_cc(c[6], high7, s, c[6]);
-	madc_hi_cc(c[5], high6, s, c[5]);
-	addc_cc(c[4], c[4], 0);
-	addc_cc(c[3], c[3], 0);
-	addc_cc(c[2], c[2], 0);
-	addc_cc(c[1], c[1], 0);
-	addc_cc(c[0], c[0], 0);
-	addc(high[7], high[7], 0);
-
-
-	bool overflow = high[7] != 0;
-
-	unsigned int borrow = sub(c, _P, c);
-
-	if(overflow) {
-		if(!borrow) {
-			sub(c, _P, c);
-		}
-	} else {
-		if(borrow) {
-			add(c, _P, c);
-		}
-	}
-}
-
-
-/**
- * Square mod P
- * b = a * a
- */
-__device__ static void squareModP(const unsigned int a[8], unsigned int b[8])
-{
-	mulModP(a, a, b);
-}
-
-/**
- * Square mod P
- * x = x * x
- */
-__device__ static void squareModP(unsigned int x[8])
-{
-	unsigned int tmp[8];
-	squareModP(x, tmp);
-	copyBigInt(tmp, x);
-}
-
-/**
- * Multiply mod P
- * c = a * c
- */
-__device__ static void mulModP(const unsigned int a[8], unsigned int c[8])
-{
-	unsigned int tmp[8];
-	mulModP(a, c, tmp);
-
-	copyBigInt(tmp, c);
-}
-
-/**
- * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains
- */
-__device__ static void invModP(unsigned int value[8])
-{
-	unsigned int x[8];
-
-	copyBigInt(value, x);
-
-	unsigned int y[8] = { 0, 0, 0, 0, 0, 0, 0, 1 };
-
-	// 0xd - 1101
-	mulModP(x, y);
-	squareModP(x);
-	//mulModP(x, y);
-	squareModP(x);
-	mulModP(x, y);
-	squareModP(x);
-	mulModP(x, y);
-	squareModP(x);
-
-
-	// 0x2 - 0010
-	//mulModP(x, y);
-	squareModP(x);
-	mulModP(x, y);
-	squareModP(x);
-	//mulModP(x, y);
-	squareModP(x);
-	//mulModP(x, y);
-	squareModP(x);
-
-	// 0xc = 0x1100
-	//mulModP(x, y);
-	squareModP(x);
-	//mulModP(x, y);
-	squareModP(x);
-	mulModP(x, y);
-	squareModP(x);
-	mulModP(x, y);
-	squareModP(x);
-
-	// 0xfffff
-	for(int i = 0; i < 20; i++) {
-		mulModP(x, y);
-		squareModP(x);
-	}
-
-	// 0xe - 1110
-	//mulModP(x, y);
-	squareModP(x);
-	mulModP(x, y);
-	squareModP(x);
-	mulModP(x, y);
-	squareModP(x);
-	mulModP(x, y);
-	squareModP(x);
-
-	// 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffff
-	for(int i = 0; i < 219; i++) {
-		mulModP(x, y);
-		squareModP(x);
-	}
-	mulModP(x, y);
-
-	copyBigInt(y, value);
-}
-
-__device__ static void invModP(const unsigned int *value, unsigned int *inverse)
-{
-	copyBigInt(value, inverse);
-
-	invModP(inverse);
-}
-
-__device__ static void negModP(const unsigned int *value, unsigned int *negative)
-{
-	sub_cc(negative[0], _P[0], value[0]);
-	subc_cc(negative[1], _P[1], value[1]);
-	subc_cc(negative[2], _P[2], value[2]);
-	subc_cc(negative[3], _P[3], value[3]);
-	subc_cc(negative[4], _P[4], value[4]);
-	subc_cc(negative[5], _P[5], value[5]);
-	subc_cc(negative[6], _P[6], value[6]);
-	subc(negative[7], _P[7], value[7]);
-}
-
-
-__device__ __forceinline__ static void beginBatchAdd(const unsigned int *px, const unsigned int *x, unsigned int *chain, int i, int batchIdx, unsigned int inverse[8])
-{
-	// x = Gx - x
-	unsigned int t[8];
-	subModP(px, x, t);
-
-	// Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1,
-	// c[2] = diff2 * diff1 * diff0, etc
-	mulModP(t, inverse);
-
-	writeInt(chain, batchIdx, inverse);
-}
-
-
-__device__ __forceinline__ static void beginBatchAddWithDouble(const unsigned int *px, const unsigned int *py, unsigned int *xPtr, unsigned int *chain, int i, int batchIdx, unsigned int inverse[8])
-{
-	unsigned int x[8];
-	readInt(xPtr, i, x);
-
-	if(equal(px, x)) {
-		addModP(py, py, x);
-	} else {
-		// x = Gx - x
-		subModP(px, x, x);
-	}
-
-	// Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1,
-	// c[2] = diff2 * diff1 * diff0, etc
-	mulModP(x, inverse);
-
-	writeInt(chain, batchIdx, inverse);
-}
-
-__device__ static void completeBatchAddWithDouble(const unsigned int *px, const unsigned int *py, const unsigned int *xPtr, const unsigned int *yPtr, int i, int batchIdx, unsigned int *chain, unsigned int *inverse, unsigned int newX[8], unsigned int newY[8])
-{
-	unsigned int s[8];
-	unsigned int x[8];
-	unsigned int y[8];
-
-	readInt(xPtr, i, x);
-	readInt(yPtr, i, y);
-
-	if(batchIdx >= 1) {
-		unsigned int c[8];
-
-		readInt(chain, batchIdx - 1, c);
-
-		mulModP(inverse, c, s);
-
-		unsigned int diff[8];
-		if(equal(px, x)) {
-			addModP(py, py, diff);
-		} else {
-			subModP(px, x, diff);
-		}
-
-		mulModP(diff, inverse);
-	} else {
-		copyBigInt(inverse, s);
-	}
-
-
-	if(equal(px, x)) {
-		// currently s = 1 / 2y
-
-		unsigned int x2[8];
-		unsigned int tx2[8];
-
-		// 3x^2
-		mulModP(x, x, x2);
-		addModP(x2, x2, tx2);
-		addModP(x2, tx2, tx2);
-
-
-		// s = 3x^2 * 1/2y
-		mulModP(tx2, s);
-
-		// s^2
-		unsigned int s2[8];
-		mulModP(s, s, s2);
-
-		// Rx = s^2 - 2px
-		subModP(s2, x, newX);
-		subModP(newX, x, newX);
-
-		// Ry = s(px - rx) - py
-		unsigned int k[8];
-		subModP(px, newX, k);
-		mulModP(s, k, newY);
-		subModP(newY, py, newY);
-
-	} else {
-
-		unsigned int rise[8];
-		subModP(py, y, rise);
-
-		mulModP(rise, s);
-
-		// Rx = s^2 - Gx - Qx
-		unsigned int s2[8];
-		mulModP(s, s, s2);
-
-		subModP(s2, px, newX);
-		subModP(newX, x, newX);
-
-		// Ry = s(px - rx) - py
-		unsigned int k[8];
-		subModP(px, newX, k);
-		mulModP(s, k, newY);
-		subModP(newY, py, newY);
-	}
-}
-
-__device__ static void completeBatchAdd(const unsigned int *px, const unsigned int *py, unsigned int *xPtr, unsigned int *yPtr, int i, int batchIdx, unsigned int *chain, unsigned int *inverse, unsigned int newX[8], unsigned int newY[8])
-{
-	unsigned int s[8];
-	unsigned int x[8];
-
-	readInt(xPtr, i, x);
-
-	if(batchIdx >= 1) {
-		unsigned int c[8];
-
-		readInt(chain, batchIdx - 1, c);
-		mulModP(inverse, c, s);
-
-		unsigned int diff[8];
-		subModP(px, x, diff);
-		mulModP(diff, inverse);
-	} else {
-		copyBigInt(inverse, s);
-	}
-
-	unsigned int y[8];
-	readInt(yPtr, i, y);
-
-	unsigned int rise[8];
-	subModP(py, y, rise);
-
-	mulModP(rise, s);
-
-	// Rx = s^2 - Gx - Qx
-	unsigned int s2[8];
-	mulModP(s, s, s2);
-	subModP(s2, px, newX);
-	subModP(newX, x, newX);
-
-	// Ry = s(px - rx) - py
-	unsigned int k[8];
-	subModP(px, newX, k);
-	mulModP(s, k, newY);
-	subModP(newY, py, newY);
-}
-
-
-__device__ __forceinline__ static void doBatchInverse(unsigned int inverse[8])
-{
-	invModP(inverse);
-}
-
-#endif
\ No newline at end of file
diff --git a/cudaMath/sha256.cuh b/cudaMath/sha256.cuh
deleted file mode 100644
index b04aa9a..0000000
--- a/cudaMath/sha256.cuh
+++ /dev/null
@@ -1,545 +0,0 @@
-#ifndef _SHA256_CUH
-#define _SHA256_CUH
-
-#include<cuda.h>
-#include<cuda_runtime.h>
-
-#include<stdio.h>
-
-
-__constant__ unsigned int _K[64] = {
-	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-__constant__ unsigned int _IV[8] = {
-	0x6a09e667,
-	0xbb67ae85,
-	0x3c6ef372,
-	0xa54ff53a,
-	0x510e527f,
-	0x9b05688c,
-	0x1f83d9ab,
-	0x5be0cd19
-};
-
-
-__device__ __forceinline__ unsigned int rotr(unsigned int x, int n)
-{
-	return (x >> n) ^ (x << (32 - n));
-}
-
-__device__ __forceinline__ unsigned int MAJ(unsigned int a, unsigned int b, unsigned int c)
-{
-	return (a & b) ^ (a & c) ^ (b & c);
-}
-
-__device__ __forceinline__ unsigned int CH(unsigned int e, unsigned int f, unsigned int g)
-{
-	return (e & f) ^ (~e & g);
-}
-
-__device__ __forceinline__ unsigned int s0(unsigned int x)
-{
-	return rotr(x, 7) ^ rotr(x, 18) ^ (x >> 3);
-}
-
-__device__ __forceinline__ unsigned int s1(unsigned int x)
-{
-	return rotr(x, 17) ^ rotr(x, 19) ^ (x >> 10);
-}
-
-
-__device__ __forceinline__ void round(unsigned int a, unsigned int b, unsigned int c, unsigned int &d, unsigned e, unsigned int f, unsigned int g, unsigned int &h, unsigned int m, unsigned int k)
-{
-	unsigned int s = CH(e, f, g) + (rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25)) + k + m;
-
-	d += s + h;
-
-	h += s + MAJ(a, b, c) + (rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22));
-}
-
-__device__ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned int digest[8])
-{
-	unsigned int a, b, c, d, e, f, g, h;
-	unsigned int w[16];
-
-	// 0x04 || x || y
-	w[0] = (x[0] >> 8) | 0x04000000;
-	w[1] = (x[1] >> 8) | (x[0] << 24);
-	w[2] = (x[2] >> 8) | (x[1] << 24);
-	w[3] = (x[3] >> 8) | (x[2] << 24);
-	w[4] = (x[4] >> 8) | (x[3] << 24);
-	w[5] = (x[5] >> 8) | (x[4] << 24);
-	w[6] = (x[6] >> 8) | (x[5] << 24);
-	w[7] = (x[7] >> 8) | (x[6] << 24);
-	w[8] = (y[0] >> 8) | (x[7] << 24);
-	w[9] = (y[1] >> 8) | (y[0] << 24);
-	w[10] = (y[2] >> 8) | (y[1] << 24);
-	w[11] = (y[3] >> 8) | (y[2] << 24);
-	w[12] = (y[4] >> 8) | (y[3] << 24);
-	w[13] = (y[5] >> 8) | (y[4] << 24);
-	w[14] = (y[6] >> 8) | (y[5] << 24);
-	w[15] = (y[7] >> 8) | (y[6] << 24);
-
-	a = _IV[0];
-	b = _IV[1];
-	c = _IV[2];
-	d = _IV[3];
-	e = _IV[4];
-	f = _IV[5];
-	g = _IV[6];
-	h = _IV[7];
-
-
-	round(a, b, c, d, e, f, g, h, w[0], _K[0]);
-	round(h, a, b, c, d, e, f, g, w[1], _K[1]);
-	round(g, h, a, b, c, d, e, f, w[2], _K[2]);
-	round(f, g, h, a, b, c, d, e, w[3], _K[3]);
-	round(e, f, g, h, a, b, c, d, w[4], _K[4]);
-	round(d, e, f, g, h, a, b, c, w[5], _K[5]);
-	round(c, d, e, f, g, h, a, b, w[6], _K[6]);
-	round(b, c, d, e, f, g, h, a, w[7], _K[7]);
-	round(a, b, c, d, e, f, g, h, w[8], _K[8]);
-	round(h, a, b, c, d, e, f, g, w[9], _K[9]);
-	round(g, h, a, b, c, d, e, f, w[10], _K[10]);
-	round(f, g, h, a, b, c, d, e, w[11], _K[11]);
-	round(e, f, g, h, a, b, c, d, w[12], _K[12]);
-	round(d, e, f, g, h, a, b, c, w[13], _K[13]);
-	round(c, d, e, f, g, h, a, b, w[14], _K[14]);
-	round(b, c, d, e, f, g, h, a, w[15], _K[15]);
-
-
-
-	w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
-	w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
-	w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
-	w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
-	w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
-	w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
-	w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
-	w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
-	w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
-	w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
-	w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
-	w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
-	w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
-	w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
-	w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
-	w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
-	round(a, b, c, d, e, f, g, h, w[0], _K[16]);
-	round(h, a, b, c, d, e, f, g, w[1], _K[17]);
-	round(g, h, a, b, c, d, e, f, w[2], _K[18]);
-	round(f, g, h, a, b, c, d, e, w[3], _K[19]);
-	round(e, f, g, h, a, b, c, d, w[4], _K[20]);
-	round(d, e, f, g, h, a, b, c, w[5], _K[21]);
-	round(c, d, e, f, g, h, a, b, w[6], _K[22]);
-	round(b, c, d, e, f, g, h, a, w[7], _K[23]);
-	round(a, b, c, d, e, f, g, h, w[8], _K[24]);
-	round(h, a, b, c, d, e, f, g, w[9], _K[25]);
-	round(g, h, a, b, c, d, e, f, w[10], _K[26]);
-	round(f, g, h, a, b, c, d, e, w[11], _K[27]);
-	round(e, f, g, h, a, b, c, d, w[12], _K[28]);
-	round(d, e, f, g, h, a, b, c, w[13], _K[29]);
-	round(c, d, e, f, g, h, a, b, w[14], _K[30]);
-	round(b, c, d, e, f, g, h, a, w[15], _K[31]);
-
-
-	w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
-	w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
-	w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
-	w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
-	w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
-	w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
-	w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
-	w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
-	w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
-	w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
-	w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
-	w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
-	w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
-	w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
-	w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
-	w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
-	round(a, b, c, d, e, f, g, h, w[0], _K[32]);
-	round(h, a, b, c, d, e, f, g, w[1], _K[33]);
-	round(g, h, a, b, c, d, e, f, w[2], _K[34]);
-	round(f, g, h, a, b, c, d, e, w[3], _K[35]);
-	round(e, f, g, h, a, b, c, d, w[4], _K[36]);
-	round(d, e, f, g, h, a, b, c, w[5], _K[37]);
-	round(c, d, e, f, g, h, a, b, w[6], _K[38]);
-	round(b, c, d, e, f, g, h, a, w[7], _K[39]);
-	round(a, b, c, d, e, f, g, h, w[8], _K[40]);
-	round(h, a, b, c, d, e, f, g, w[9], _K[41]);
-	round(g, h, a, b, c, d, e, f, w[10], _K[42]);
-	round(f, g, h, a, b, c, d, e, w[11], _K[43]);
-	round(e, f, g, h, a, b, c, d, w[12], _K[44]);
-	round(d, e, f, g, h, a, b, c, w[13], _K[45]);
-	round(c, d, e, f, g, h, a, b, w[14], _K[46]);
-	round(b, c, d, e, f, g, h, a, w[15], _K[47]);
-
-
-
-
-	w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
-	w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
-	w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
-	w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
-	w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
-	w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
-	w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
-	w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
-	w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
-	w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
-	w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
-	w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
-	w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
-	w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
-	w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
-	w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
-	round(a, b, c, d, e, f, g, h, w[0], _K[48]);
-	round(h, a, b, c, d, e, f, g, w[1], _K[49]);
-	round(g, h, a, b, c, d, e, f, w[2], _K[50]);
-	round(f, g, h, a, b, c, d, e, w[3], _K[51]);
-	round(e, f, g, h, a, b, c, d, w[4], _K[52]);
-	round(d, e, f, g, h, a, b, c, w[5], _K[53]);
-	round(c, d, e, f, g, h, a, b, w[6], _K[54]);
-	round(b, c, d, e, f, g, h, a, w[7], _K[55]);
-	round(a, b, c, d, e, f, g, h, w[8], _K[56]);
-	round(h, a, b, c, d, e, f, g, w[9], _K[57]);
-	round(g, h, a, b, c, d, e, f, w[10], _K[58]);
-	round(f, g, h, a, b, c, d, e, w[11], _K[59]);
-	round(e, f, g, h, a, b, c, d, w[12], _K[60]);
-	round(d, e, f, g, h, a, b, c, w[13], _K[61]);
-	round(c, d, e, f, g, h, a, b, w[14], _K[62]);
-	round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
-	a += _IV[0];
-	b += _IV[1];
-	c += _IV[2];
-	d += _IV[3];
-	e += _IV[4];
-	f += _IV[5];
-	g += _IV[6];
-	h += _IV[7];
-
-	// store the intermediate hash value
-	unsigned int tmp[8];
-	tmp[0] = a;
-	tmp[1] = b;
-	tmp[2] = c;
-	tmp[3] = d;
-	tmp[4] = e;
-	tmp[5] = f;
-	tmp[6] = g;
-	tmp[7] = h;
-
-	w[0] = (y[7] << 24) | 0x00800000;
-	w[15] = 65 * 8;
-
-	round(a, b, c, d, e, f, g, h, w[0], _K[0]);
-	round(h, a, b, c, d, e, f, g, 0, _K[1]);
-	round(g, h, a, b, c, d, e, f, 0, _K[2]);
-	round(f, g, h, a, b, c, d, e, 0, _K[3]);
-	round(e, f, g, h, a, b, c, d, 0, _K[4]);
-	round(d, e, f, g, h, a, b, c, 0, _K[5]);
-	round(c, d, e, f, g, h, a, b, 0, _K[6]);
-	round(b, c, d, e, f, g, h, a, 0, _K[7]);
-	round(a, b, c, d, e, f, g, h, 0, _K[8]);
-	round(h, a, b, c, d, e, f, g, 0, _K[9]);
-	round(g, h, a, b, c, d, e, f, 0, _K[10]);
-	round(f, g, h, a, b, c, d, e, 0, _K[11]);
-	round(e, f, g, h, a, b, c, d, 0, _K[12]);
-	round(d, e, f, g, h, a, b, c, 0, _K[13]);
-	round(c, d, e, f, g, h, a, b, 0, _K[14]);
-	round(b, c, d, e, f, g, h, a, w[15], _K[15]);
-
-	w[0] = w[0] + s0(0) + 0 + s1(0);
-	w[1] = 0 + s0(0) + 0 + s1(w[15]);
-	w[2] = 0 + s0(0) + 0 + s1(w[0]);
-	w[3] = 0 + s0(0) + 0 + s1(w[1]);
-	w[4] = 0 + s0(0) + 0 + s1(w[2]);
-	w[5] = 0 + s0(0) + 0 + s1(w[3]);
-	w[6] = 0 + s0(0) + w[15] + s1(w[4]);
-	w[7] = 0 + s0(0) + w[0] + s1(w[5]);
-	w[8] = 0 + s0(0) + w[1] + s1(w[6]);
-	w[9] = 0 + s0(0) + w[2] + s1(w[7]);
-	w[10] = 0 + s0(0) + w[3] + s1(w[8]);
-	w[11] = 0 + s0(0) + w[4] + s1(w[9]);
-	w[12] = 0 + s0(0) + w[5] + s1(w[10]);
-	w[13] = 0 + s0(0) + w[6] + s1(w[11]);
-	w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]);
-	w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-	
-
-
-	round(a, b, c, d, e, f, g, h, w[0], _K[16]);
-	round(h, a, b, c, d, e, f, g, w[1], _K[17]);
-	round(g, h, a, b, c, d, e, f, w[2], _K[18]);
-	round(f, g, h, a, b, c, d, e, w[3], _K[19]);
-	round(e, f, g, h, a, b, c, d, w[4], _K[20]);
-	round(d, e, f, g, h, a, b, c, w[5], _K[21]);
-	round(c, d, e, f, g, h, a, b, w[6], _K[22]);
-	round(b, c, d, e, f, g, h, a, w[7], _K[23]);
-	round(a, b, c, d, e, f, g, h, w[8], _K[24]);
-	round(h, a, b, c, d, e, f, g, w[9], _K[25]);
-	round(g, h, a, b, c, d, e, f, w[10], _K[26]);
-	round(f, g, h, a, b, c, d, e, w[11], _K[27]);
-	round(e, f, g, h, a, b, c, d, w[12], _K[28]);
-	round(d, e, f, g, h, a, b, c, w[13], _K[29]);
-	round(c, d, e, f, g, h, a, b, w[14], _K[30]);
-	round(b, c, d, e, f, g, h, a, w[15], _K[31]);
-
-	w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
-	w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
-	w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
-	w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
-	w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
-	w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
-	w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
-	w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
-	w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
-	w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
-	w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
-	w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
-	w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
-	w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
-	w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
-	w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
-	round(a, b, c, d, e, f, g, h, w[0], _K[32]);
-	round(h, a, b, c, d, e, f, g, w[1], _K[33]);
-	round(g, h, a, b, c, d, e, f, w[2], _K[34]);
-	round(f, g, h, a, b, c, d, e, w[3], _K[35]);
-	round(e, f, g, h, a, b, c, d, w[4], _K[36]);
-	round(d, e, f, g, h, a, b, c, w[5], _K[37]);
-	round(c, d, e, f, g, h, a, b, w[6], _K[38]);
-	round(b, c, d, e, f, g, h, a, w[7], _K[39]);
-	round(a, b, c, d, e, f, g, h, w[8], _K[40]);
-	round(h, a, b, c, d, e, f, g, w[9], _K[41]);
-	round(g, h, a, b, c, d, e, f, w[10], _K[42]);
-	round(f, g, h, a, b, c, d, e, w[11], _K[43]);
-	round(e, f, g, h, a, b, c, d, w[12], _K[44]);
-	round(d, e, f, g, h, a, b, c, w[13], _K[45]);
-	round(c, d, e, f, g, h, a, b, w[14], _K[46]);
-	round(b, c, d, e, f, g, h, a, w[15], _K[47]);
-
-	w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
-	w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
-	w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
-	w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
-	w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
-	w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
-	w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
-	w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
-	w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
-	w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
-	w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
-	w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
-	w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
-	w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
-	w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
-	w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
-	round(a, b, c, d, e, f, g, h, w[0], _K[48]);
-	round(h, a, b, c, d, e, f, g, w[1], _K[49]);
-	round(g, h, a, b, c, d, e, f, w[2], _K[50]);
-	round(f, g, h, a, b, c, d, e, w[3], _K[51]);
-	round(e, f, g, h, a, b, c, d, w[4], _K[52]);
-	round(d, e, f, g, h, a, b, c, w[5], _K[53]);
-	round(c, d, e, f, g, h, a, b, w[6], _K[54]);
-	round(b, c, d, e, f, g, h, a, w[7], _K[55]);
-	round(a, b, c, d, e, f, g, h, w[8], _K[56]);
-	round(h, a, b, c, d, e, f, g, w[9], _K[57]);
-	round(g, h, a, b, c, d, e, f, w[10], _K[58]);
-	round(f, g, h, a, b, c, d, e, w[11], _K[59]);
-	round(e, f, g, h, a, b, c, d, w[12], _K[60]);
-	round(d, e, f, g, h, a, b, c, w[13], _K[61]);
-	round(c, d, e, f, g, h, a, b, w[14], _K[62]);
-	round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
-	digest[0] = tmp[0] + a;
-	digest[1] = tmp[1] + b;
-	digest[2] = tmp[2] + c;
-	digest[3] = tmp[3] + d;
-	digest[4] = tmp[4] + e;
-	digest[5] = tmp[5] + f;
-	digest[6] = tmp[6] + g;
-	digest[7] = tmp[7] + h;
-}
-
-__device__ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8])
-{
-	unsigned int a, b, c, d, e, f, g, h;
-	unsigned int w[16];
-
-	// 0x03 || x  or  0x02 || x
-	w[0] = 0x02000000 | ((yParity & 1) << 24) | (x[0] >> 8);
-
-	w[1] = (x[1] >> 8) | (x[0] << 24);
-	w[2] = (x[2] >> 8) | (x[1] << 24);
-	w[3] = (x[3] >> 8) | (x[2] << 24);
-	w[4] = (x[4] >> 8) | (x[3] << 24);
-	w[5] = (x[5] >> 8) | (x[4] << 24);
-	w[6] = (x[6] >> 8) | (x[5] << 24);
-	w[7] = (x[7] >> 8) | (x[6] << 24);
-	w[8] = (x[7] << 24) | 0x00800000;
-	w[15] = 33 * 8;
-
-	a = _IV[0];
-	b = _IV[1];
-	c = _IV[2];
-	d = _IV[3];
-	e = _IV[4];
-	f = _IV[5];
-	g = _IV[6];
-	h = _IV[7];
-
-	round(a, b, c, d, e, f, g, h, w[0], _K[0]);
-	round(h, a, b, c, d, e, f, g, w[1], _K[1]);
-	round(g, h, a, b, c, d, e, f, w[2], _K[2]);
-	round(f, g, h, a, b, c, d, e, w[3], _K[3]);
-	round(e, f, g, h, a, b, c, d, w[4], _K[4]);
-	round(d, e, f, g, h, a, b, c, w[5], _K[5]);
-	round(c, d, e, f, g, h, a, b, w[6], _K[6]);
-	round(b, c, d, e, f, g, h, a, w[7], _K[7]);
-	round(a, b, c, d, e, f, g, h, w[8], _K[8]);
-	round(h, a, b, c, d, e, f, g, 0, _K[9]);
-	round(g, h, a, b, c, d, e, f, 0, _K[10]);
-	round(f, g, h, a, b, c, d, e, 0, _K[11]);
-	round(e, f, g, h, a, b, c, d, 0, _K[12]);
-	round(d, e, f, g, h, a, b, c, 0, _K[13]);
-	round(c, d, e, f, g, h, a, b, 0, _K[14]);
-	round(b, c, d, e, f, g, h, a, w[15], _K[15]);
-
-	w[0] = w[0] + s0(w[1]) + 0 + s1(0);
-	w[1] = w[1] + s0(w[2]) + 0 + s1(w[15]);
-	w[2] = w[2] + s0(w[3]) + 0 + s1(w[0]);
-	w[3] = w[3] + s0(w[4]) + 0 + s1(w[1]);
-	w[4] = w[4] + s0(w[5]) + 0 + s1(w[2]);
-	w[5] = w[5] + s0(w[6]) + 0 + s1(w[3]);
-	w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
-	w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
-	w[8] = w[8] + s0(0) + w[1] + s1(w[6]);
-	w[9] = 0 + s0(0) + w[2] + s1(w[7]);
-	w[10] = 0 + s0(0) + w[3] + s1(w[8]);
-	w[11] = 0 + s0(0) + w[4] + s1(w[9]);
-	w[12] = 0 + s0(0) + w[5] + s1(w[10]);
-	w[13] = 0 + s0(0) + w[6] + s1(w[11]);
-	w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]);
-	w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
-	round(a, b, c, d, e, f, g, h, w[0], _K[16]);
-	round(h, a, b, c, d, e, f, g, w[1], _K[17]);
-	round(g, h, a, b, c, d, e, f, w[2], _K[18]);
-	round(f, g, h, a, b, c, d, e, w[3], _K[19]);
-	round(e, f, g, h, a, b, c, d, w[4], _K[20]);
-	round(d, e, f, g, h, a, b, c, w[5], _K[21]);
-	round(c, d, e, f, g, h, a, b, w[6], _K[22]);
-	round(b, c, d, e, f, g, h, a, w[7], _K[23]);
-	round(a, b, c, d, e, f, g, h, w[8], _K[24]);
-	round(h, a, b, c, d, e, f, g, w[9], _K[25]);
-	round(g, h, a, b, c, d, e, f, w[10], _K[26]);
-	round(f, g, h, a, b, c, d, e, w[11], _K[27]);
-	round(e, f, g, h, a, b, c, d, w[12], _K[28]);
-	round(d, e, f, g, h, a, b, c, w[13], _K[29]);
-	round(c, d, e, f, g, h, a, b, w[14], _K[30]);
-	round(b, c, d, e, f, g, h, a, w[15], _K[31]);
-
-	w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
-	w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
-	w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
-	w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
-	w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
-	w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
-	w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
-	w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
-	w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
-	w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
-	w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
-	w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
-	w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
-	w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
-	w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
-	w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
-	round(a, b, c, d, e, f, g, h, w[0], _K[32]);
-	round(h, a, b, c, d, e, f, g, w[1], _K[33]);
-	round(g, h, a, b, c, d, e, f, w[2], _K[34]);
-	round(f, g, h, a, b, c, d, e, w[3], _K[35]);
-	round(e, f, g, h, a, b, c, d, w[4], _K[36]);
-	round(d, e, f, g, h, a, b, c, w[5], _K[37]);
-	round(c, d, e, f, g, h, a, b, w[6], _K[38]);
-	round(b, c, d, e, f, g, h, a, w[7], _K[39]);
-	round(a, b, c, d, e, f, g, h, w[8], _K[40]);
-	round(h, a, b, c, d, e, f, g, w[9], _K[41]);
-	round(g, h, a, b, c, d, e, f, w[10], _K[42]);
-	round(f, g, h, a, b, c, d, e, w[11], _K[43]);
-	round(e, f, g, h, a, b, c, d, w[12], _K[44]);
-	round(d, e, f, g, h, a, b, c, w[13], _K[45]);
-	round(c, d, e, f, g, h, a, b, w[14], _K[46]);
-	round(b, c, d, e, f, g, h, a, w[15], _K[47]);
-
-
-	w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
-	w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
-	w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
-	w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
-	w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
-	w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
-	w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
-	w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
-	w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
-	w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
-	w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
-	w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
-	w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
-	w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
-	w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
-	w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
-	round(a, b, c, d, e, f, g, h, w[0], _K[48]);
-	round(h, a, b, c, d, e, f, g, w[1], _K[49]);
-	round(g, h, a, b, c, d, e, f, w[2], _K[50]);
-	round(f, g, h, a, b, c, d, e, w[3], _K[51]);
-	round(e, f, g, h, a, b, c, d, w[4], _K[52]);
-	round(d, e, f, g, h, a, b, c, w[5], _K[53]);
-	round(c, d, e, f, g, h, a, b, w[6], _K[54]);
-	round(b, c, d, e, f, g, h, a, w[7], _K[55]);
-	round(a, b, c, d, e, f, g, h, w[8], _K[56]);
-	round(h, a, b, c, d, e, f, g, w[9], _K[57]);
-	round(g, h, a, b, c, d, e, f, w[10], _K[58]);
-	round(f, g, h, a, b, c, d, e, w[11], _K[59]);
-	round(e, f, g, h, a, b, c, d, w[12], _K[60]);
-	round(d, e, f, g, h, a, b, c, w[13], _K[61]);
-	round(c, d, e, f, g, h, a, b, w[14], _K[62]);
-	round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
-	a += _IV[0];
-	b += _IV[1];
-	c += _IV[2];
-	d += _IV[3];
-	e += _IV[4];
-	f += _IV[5];
-	g += _IV[6];
-	h += _IV[7];
-
-	digest[0] = a;
-	digest[1] = b;
-	digest[2] = c;
-	digest[3] = d;
-	digest[4] = e;
-	digest[5] = f;
-	digest[6] = g;
-	digest[7] = h;
-}
-#endif
\ No newline at end of file
diff --git a/cudaUtil/Makefile b/cudaUtil/Makefile
deleted file mode 100644
index f443b14..0000000
--- a/cudaUtil/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-NAME=cudautil
-SRC=$(wildcard *.cpp)
-OBJS=$(SRC:.cpp=.o)
-
-all:    ${SRC}
-	for file in ${SRC} ; do\
-		${CXX} -c $$file ${INCLUDE} -I${CUDA_INCLUDE} ${CXXFLAGS};\
-	done
-	mkdir -p ${LIBDIR}
-	ar rvs ${LIBDIR}/lib$(NAME).a ${OBJS}
-
-clean:
-	rm -rf *.o
diff --git a/cudaUtil/cudaUtil.cpp b/cudaUtil/cudaUtil.cpp
deleted file mode 100644
index 349a604..0000000
--- a/cudaUtil/cudaUtil.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-#include "cudaUtil.h"
-
-
-cuda::CudaDeviceInfo cuda::getDeviceInfo(int device)
-{
-	cuda::CudaDeviceInfo devInfo;
-
-	cudaDeviceProp properties;
-	cudaError_t err = cudaSuccess;
-
-	err = cudaSetDevice(device);
-
-	if(err) {
-		throw cuda::CudaException(err);
-	}
-
-	err = cudaGetDeviceProperties(&properties, device);
-	
-	if(err) {
-		throw cuda::CudaException(err);
-	}
-
-	devInfo.id = device;
-	devInfo.major = properties.major;
-	devInfo.minor = properties.minor;
-	devInfo.mpCount = properties.multiProcessorCount;
-	devInfo.mem = properties.totalGlobalMem;
-	devInfo.name = std::string(properties.name);
-
-	int cores = 0;
-	switch(devInfo.major) {
-	case 1:
-		cores = 8;
-		break;
-	case 2:
-        if(devInfo.minor == 0) {
-            cores = 32;
-        } else {
-            cores = 48;
-        }
-		break;
-	case 3:
-		cores = 192;
-		break;
-	case 5:
-		cores = 128;
-		break;
-	case 6:
-        if(devInfo.minor == 1 || devInfo.minor == 2) {
-            cores = 128;
-        } else {
-            cores = 64;
-        }
-        break;
-	case 7:
-		cores = 64;
-		break;
-    default:
-        cores = 8;
-        break;
-	}
-	devInfo.cores = cores;
-
-	return devInfo;
-}
-
-
-std::vector<cuda::CudaDeviceInfo> cuda::getDevices()
-{
-	int count = getDeviceCount();
-
-	std::vector<CudaDeviceInfo> devList;
-
-	for(int device = 0; device < count; device++) {
-		devList.push_back(getDeviceInfo(device));
-	}
-
-	return devList;
-}
-
-int cuda::getDeviceCount()
-{
-	int count = 0;
-
-	cudaError_t err = cudaGetDeviceCount(&count);
-
-    if(err) {
-        throw cuda::CudaException(err);
-    }
-
-	return count;
-}
\ No newline at end of file
diff --git a/cudaUtil/cudaUtil.h b/cudaUtil/cudaUtil.h
deleted file mode 100644
index eaf7eab..0000000
--- a/cudaUtil/cudaUtil.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef _CUDA_UTIL_H
-#define _CUDA_UTIL_H
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-
-#include <string>
-#include <vector>
-
-namespace cuda {
-	typedef struct {
-
-		int id;
-		int major;
-		int minor;
-		int mpCount;
-		int cores;
-		uint64_t mem;
-		std::string name;
-
-	}CudaDeviceInfo;
-
-	class CudaException
-	{
-	public:
-		cudaError_t error;
-		std::string msg;
-
-		CudaException(cudaError_t err)
-		{
-			this->error = err;
-			this->msg = std::string(cudaGetErrorString(err));
-		}
-	};
-
-	CudaDeviceInfo getDeviceInfo(int device);
-
-	std::vector<CudaDeviceInfo> getDevices();
-
-	int getDeviceCount();
-}
-#endif
\ No newline at end of file
diff --git a/cudaUtil/cudaUtil.vcxproj b/cudaUtil/cudaUtil.vcxproj
deleted file mode 100644
index 24d5ccb..0000000
--- a/cudaUtil/cudaUtil.vcxproj
+++ /dev/null
@@ -1,160 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="cudaUtil.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="cudaUtil.cpp" />
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{EADAAA54-E304-4656-8263-E5E688FF323D}</ProjectGuid>
-    <Keyword>Win32Proj</Keyword>
-    <RootNamespace>cudaUtil</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="..\BitCrack.props" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="..\BitCrack.props" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="..\BitCrack.props" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="..\BitCrack.props" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup />
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Windows</SubSystem>
-    </Link>
-    <Lib>
-      <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\Win32;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>cuda.lib;cudart.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(CUDA_INCLUDE);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Windows</SubSystem>
-    </Link>
-    <Lib>
-      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Windows</SubSystem>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-    <Lib>
-      <AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\Win32;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>cuda.lib;cudart.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(CUDA_INCLUDE);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Windows</SubSystem>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-    <Lib>
-      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/embedcl/embedcl.vcxproj b/embedcl/embedcl.vcxproj
index 8d2dd7f..3013aec 100644
--- a/embedcl/embedcl.vcxproj
+++ b/embedcl/embedcl.vcxproj
@@ -28,26 +28,26 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
diff --git a/secp256k1lib/secp256k1.cpp b/secp256k1lib/secp256k1.cpp
index 6107ea0..e24fbe3 100644
--- a/secp256k1lib/secp256k1.cpp
+++ b/secp256k1lib/secp256k1.cpp
@@ -1,39 +1,36 @@
-#include<string.h>
-#include<stdio.h>
-#include<stdlib.h>
-#include"CryptoUtil.h"
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
 
+#include "CryptoUtil.h"
 #include "secp256k1.h"
 
-
 using namespace secp256k1;
 
 static uint256 _ONE(1);
 static uint256 _ZERO;
 static crypto::Rng _rng;
 
-static inline void addc(unsigned int a, unsigned int b, unsigned int carryIn, unsigned int &sum, int &carryOut)
+static inline void addc(unsigned int a, unsigned int b, int& carry, unsigned int &sum)
 {
-	uint64_t sum64 = (uint64_t)a + b + carryIn;
+	uint64_t sum64 = (uint64_t)a + b + carry;
 
 	sum = (unsigned int)sum64;
-	carryOut = (int)(sum64 >> 32) & 1;
+	carry = (int)(sum64 >> 32) & 1;
 }
 
 
-static inline void subc(unsigned int a, unsigned int b, unsigned int borrowIn, unsigned int &diff, int &borrowOut)
+static inline void subc(unsigned int a, unsigned int b, int& borrow, unsigned int &diff)
 {
-	uint64_t diff64 = (uint64_t)a - b - borrowIn;
+	uint64_t diff64 = (uint64_t)a - b - borrow;
 
 	diff = (unsigned int)diff64;
-	borrowOut = (int)((diff64 >> 32) & 1);
+	borrow = (int)((diff64 >> 32) & 1);
 }
 
-
-
-static bool lessThanEqualTo(const unsigned int *a, const unsigned int *b, int len)
+static bool lessThanEqualTo(const unsigned int *a, const unsigned int *b)
 {
-	for(int i = len - 1; i >= 0; i--) {
+	for(int i = 7; i >= 0; i--) {
 		if(a[i] < b[i]) {
 			// is greater than
 			return true;
@@ -68,7 +65,7 @@ static int add(const unsigned int *a, const unsigned int *b, unsigned int *c, in
 	int carry = 0;
 
 	for(int i = 0; i < len; i++) {
-		addc(a[i], b[i], carry, c[i], carry);
+		addc(a[i], b[i], carry, c[i]);
 	}
 
 	return carry;
@@ -79,7 +76,7 @@ static int sub(const unsigned int *a, const unsigned int *b, unsigned int *c, in
 	int borrow = 0;
 
 	for(int i = 0; i < len; i++) {
-		subc(a[i], b[i], borrow, c[i], borrow);
+		subc(a[i], b[i], borrow, c[i]);
 	}
 
 	return borrow & 1;
@@ -386,7 +383,7 @@ uint256 secp256k1::invModP(const uint256 &x)
 			}
 		}
 
-		if(lessThanEqualTo(v.v, u.v, 8)) {
+		if(lessThanEqualTo(v.v, u.v)) {
 			sub(u.v, v.v, u.v, 8);
 
 			// x1 = x1 - x2
@@ -613,7 +610,7 @@ uint256 secp256k1::multiplyModN(const uint256 &a, const uint256 &b)
 	return r;
 }
 
-std::string secp256k1::uint256::toString(int base)
+std::string secp256k1::uint256::toString()
 {
 	std::string s = "";
 
@@ -731,17 +728,6 @@ ecpoint secp256k1::multiplyPoint(const uint256 &k, const ecpoint &p)
 	return sum;
 }
 
-uint256 generatePrivateKey()
-{
-	uint256 k;
-
-	for(int i = 0; i < 8; i++) {
-		k.v[i] = ((unsigned int)rand() | ((unsigned int)rand()) << 17);
-	}
-
-	return k;
-}
-
 bool secp256k1::pointExists(const ecpoint &p)
 {
 	uint256 y = multiplyModP(p.y, p.y);
@@ -767,14 +753,15 @@ static void bulkInversionModP(std::vector<uint256> &in)
 
 	uint256 inverse = secp256k1::invModP(total);
 
-	for(int i = (int)in.size() - 1; i >= 0; i--) {
+	for(size_t i = in.size() - 1;; i--) {
 
-		if(i > 0) {
+		if(i != 0) {
 			uint256 newValue = secp256k1::multiplyModP(products[i - 1], inverse);
 			inverse = multiplyModP(inverse, in[i]);
 			in[i] = newValue;
 		} else {
-			in[i] = inverse;
+			in[0] = inverse;
+			break;
 		}
 	}
 }
@@ -792,7 +779,7 @@ void secp256k1::generateKeyPairsBulk(unsigned int count, const ecpoint &basePoin
 
 void secp256k1::generateKeyPairsBulk(const ecpoint &basePoint, std::vector<uint256> &privKeys, std::vector<ecpoint> &pubKeysOut)
 {
-	unsigned int count = (unsigned int)privKeys.size();
+	size_t count = privKeys.size();
 
 	//privKeysOut.clear();
 	pubKeysOut.clear();
@@ -801,12 +788,14 @@ void secp256k1::generateKeyPairsBulk(const ecpoint &basePoint, std::vector<uint2
 	std::vector<ecpoint> table;
 
 	table.push_back(basePoint);
-	for(int i = 1; i < 256; i++) {
+	for(size_t i = 1; i < 256; i++) {
 
 		ecpoint p = doublePoint(table[i-1]);
+#ifdef DEBUG
 		if(!pointExists(p)) {
 			throw std::string("Point does not exist!");
 		}
+#endif
 		table.push_back(p);
 	}
 
@@ -860,13 +849,18 @@ void secp256k1::generateKeyPairsBulk(const ecpoint &basePoint, std::vector<uint2
 					uint256 ry = subModP(multiplyModP(s, subModP(pubKeysOut[j].x, rx)), pubKeysOut[j].y);
 
 					ecpoint r(rx, ry);
+#ifdef DEBUG
 					if(!pointExists(r)) {
 						throw std::string("Point does not exist");
 					}
+#endif
 					pubKeysOut[j] = r;
 				}
 			}
 		}
+
+		table.clear();
+		table.shrink_to_fit();
 	}
 }
 
@@ -876,11 +870,11 @@ void secp256k1::generateKeyPairsBulk(const ecpoint &basePoint, std::vector<uint2
 secp256k1::ecpoint secp256k1::parsePublicKey(const std::string &pubKeyString)
 {
 	if(pubKeyString.length() != 130) {
-		throw std::string("Invalid public key");
+		throw std::string("Invalid public key. Length of public key is not 130 characters.");
 	}
 
 	if(pubKeyString[0] != '0' || pubKeyString[1] != '4') {
-		throw std::string("Invalid public key");
+		throw std::string("Invalid public key. Expecting uncompressed format.");
 	}
 
 	std::string xString = pubKeyString.substr(2, 64);
@@ -892,7 +886,7 @@ secp256k1::ecpoint secp256k1::parsePublicKey(const std::string &pubKeyString)
 	ecpoint p(x, y);
 
 	if(!pointExists(p)) {
-		throw std::string("Invalid public key");
+		throw std::string("Invalid public key. Point is not on secp256k1-curve.");
 	}
 
 	return p;
diff --git a/secp256k1lib/secp256k1.h b/secp256k1lib/secp256k1.h
index fb20619..0214454 100644
--- a/secp256k1lib/secp256k1.h
+++ b/secp256k1lib/secp256k1.h
@@ -1,5 +1,5 @@
-#ifndef _HOST_SECP256K1_H
-#define _HOST_SECP256K1_H
+#ifndef HOST_SECP256K1_H
+#define HOST_SECP256K1_H
 
 #include<stdio.h>
 #include<stdint.h>
@@ -27,7 +27,7 @@ namespace secp256k1 {
 			std::string t = s;
 
 			// 0x prefix
-			if(t.length() >= 2 && (t[0] == '0' && t[1] == 'x' || t[1] == 'X')) {
+			if(t.length() >= 2 && (t[0] == '0' && (t[1] == 'x' || t[1] == 'X'))) {
 				t = t.substr(2);
 			}
 
@@ -41,7 +41,7 @@ namespace secp256k1 {
 			}
 
 			// Verify only valid hex characters
-			for(int i = 0; i < (int)t.length(); i++) {
+			for(size_t i = 0, tl = t.length(); i < tl; i++) {
 				if(!((t[i] >= 'a' && t[i] <= 'f') || (t[i] >= 'A' && t[i] <= 'F') || (t[i] >= '0' && t[i] <= '9'))) {
 					throw std::string("Incorrect hex formatting");
 				}
@@ -61,7 +61,7 @@ namespace secp256k1 {
 
 			int j = 0;
 			for(int i = len - 8; i >= 0; i-= 8) {
-				std::string sub = t.substr(i, 8);
+				std::string sub = t.substr((unsigned long long)i, 8);
 				uint32_t val;
 				if(sscanf(sub.c_str(), "%x", &val) != 1) {
 					throw std::string("Incorrect hex formatting");
@@ -261,12 +261,13 @@ namespace secp256k1 {
 			return (this->v[0] & 1) == 0;
 		}
 
-		std::string toString(int base = 16);
+		std::string toString();
 
         uint64_t toUint64()
         {
             return ((uint64_t)this->v[1] << 32) | v[0];
         }
+
 	};
 
 	const unsigned int _POINT_AT_INFINITY_WORDS[8] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
@@ -290,10 +291,10 @@ namespace secp256k1 {
 			this->y = uint256(_POINT_AT_INFINITY_WORDS);
 		}
 
-		ecpoint(const uint256 &x, const uint256 &y)
+		ecpoint(const uint256 &pX, const uint256 &pY)
 		{
-			this->x = x;
-			this->y = y;
+			this->x = pX;
+			this->y = pY;
 		}
 
 		ecpoint(const ecpoint &p)
@@ -365,7 +366,8 @@ namespace secp256k1 {
 	void generateKeyPairsBulk(unsigned int count, const ecpoint &basePoint, std::vector<uint256> &privKeysOut, std::vector<ecpoint> &pubKeysOut);
 	void generateKeyPairsBulk(const ecpoint &basePoint, std::vector<uint256> &privKeys, std::vector<ecpoint> &pubKeysOut);
 
+	uint256 generatePrivateKey();
 	ecpoint parsePublicKey(const std::string &pubKeyString);
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/secp256k1lib/secp256k1lib.vcxproj b/secp256k1lib/secp256k1lib.vcxproj
index d9270a9..21aa7f3 100644
--- a/secp256k1lib/secp256k1lib.vcxproj
+++ b/secp256k1lib/secp256k1lib.vcxproj
@@ -5,6 +5,14 @@
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|Win32">
+      <Configuration>Performance Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|x64">
+      <Configuration>Performance Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -33,32 +41,46 @@
     <ProjectGuid>{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>secp256k1lib</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>NotSet</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>NotSet</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PreferredToolArchitecture>x64</PreferredToolArchitecture>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>NotSet</CharacterSet>
   </PropertyGroup>
@@ -71,6 +93,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -79,6 +105,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -98,10 +128,23 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>EnableAllWarnings</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>$(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
@@ -110,6 +153,25 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
diff --git a/util/util.cpp b/util/util.cpp
index b48750b..c7bbdf2 100644
--- a/util/util.cpp
+++ b/util/util.cpp
@@ -8,7 +8,7 @@
 #include"util.h"
 
 #ifdef _WIN32
-#include<windows.h>
+#include <windows.h>
 #else
 #include<unistd.h>
 #include<sys/stat.h>
@@ -295,4 +295,4 @@ namespace util {
 
         return s.substr(left, right - left + 1);
     }
-}
\ No newline at end of file
+}
diff --git a/util/util.h b/util/util.h
index 83b81b2..5a0b4e3 100644
--- a/util/util.h
+++ b/util/util.h
@@ -1,5 +1,5 @@
-#ifndef _UTIL_H
-#define _UTIL_H
+#ifndef UTIL_H
+#define UTIL_H
 
 #include <string>
 #include <vector>
@@ -27,6 +27,8 @@ std::string formatSeconds(unsigned int seconds);
 uint32_t parseUInt32(std::string s);
 uint64_t parseUInt64(std::string s);
 bool isHex(const std::string &s);
+
+long getFileSize(const std::string& fileName);
 bool appendToFile(const std::string &fileName, const std::string &s);
 bool readLinesFromStream(std::istream &in, std::vector<std::string> &lines);
 bool readLinesFromStream(const std::string &fileName, std::vector<std::string> &lines);
@@ -43,4 +45,4 @@ std::string trim(const std::string &s, char c=' ');
 
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/util/util.vcxproj b/util/util.vcxproj
index 687925a..5c7800e 100644
--- a/util/util.vcxproj
+++ b/util/util.vcxproj
@@ -5,6 +5,14 @@
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|Win32">
+      <Configuration>Performance Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Performance Release|x64">
+      <Configuration>Performance Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -28,32 +36,46 @@
     <ProjectGuid>{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>util</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCl</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>ClangCl</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PreferredToolArchitecture>x64</PreferredToolArchitecture>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>ClangCL</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
@@ -66,6 +88,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -74,6 +100,10 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="..\BitCrack.props" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
     <Import Project="..\BitCrack.props" />
@@ -92,10 +122,22 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>EnableAllWarnings</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
     </ClCompile>
@@ -103,6 +145,24 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Performance Release|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>