diff --git a/.gitignore b/.gitignore
index 3d8e148..53cb367 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,6 +30,7 @@ CLUnitTests/test.cl
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
+[Pp]erformance Release/
[Rr]eleases/
x64/
x86/
diff --git a/AddrGen/AddrGen.vcxproj b/AddrGen/AddrGen.vcxproj
index d7a25e6..bf50ce3 100644
--- a/AddrGen/AddrGen.vcxproj
+++ b/AddrGen/AddrGen.vcxproj
@@ -28,26 +28,26 @@
Application
true
- v141
+ ClangCl
Unicode
Application
false
- v141
+ ClangCl
true
Unicode
Application
true
- v142
+ ClangCl
Unicode
Application
false
- v142
+ ClangCL
true
Unicode
@@ -103,7 +103,7 @@
NotUsing
- Level3
+ EnableAllWarnings
Disabled
_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
$(SolutionDir)secp256k1lib;$(SolutionDir)util;$(SolutionDir)AddressUtil;$(SolutionDir)CmdParse;%(AdditionalIncludeDirectories)
diff --git a/AddrGen/main.cpp b/AddrGen/main.cpp
index 7becf5d..f5a1089 100644
--- a/AddrGen/main.cpp
+++ b/AddrGen/main.cpp
@@ -1,5 +1,5 @@
#include
-#include
+#include
#include "secp256k1.h"
#include "util.h"
#include "AddressUtil.h"
diff --git a/AddressUtil/AddressUtil.h b/AddressUtil/AddressUtil.h
index 14aab13..0879501 100644
--- a/AddressUtil/AddressUtil.h
+++ b/AddressUtil/AddressUtil.h
@@ -1,35 +1,31 @@
-#ifndef _ADDRESS_UTIL_H
-#define _ADDRESS_UTIL_H
+#ifndef ADDRESS_UTIL_H
+#define ADDRESS_UTIL_H
#include "secp256k1.h"
namespace Address {
std::string fromPublicKey(const secp256k1::ecpoint &p, bool compressed = false);
bool verifyAddress(std::string address);
-};
+}
namespace Base58 {
- std::string toBase58(const secp256k1::uint256 &x);
+ std::string toBase58(const secp256k1::uint256 value);
secp256k1::uint256 toBigInt(const std::string &s);
void getMinMaxFromPrefix(const std::string &prefix, secp256k1::uint256 &minValueOut, secp256k1::uint256 &maxValueOut);
void toHash160(const std::string &s, unsigned int hash[5]);
- bool isBase58(std::string s);
-};
-
-
+ bool isBase58(const std::string &value);
+}
namespace Hash {
-
void hashPublicKey(const secp256k1::ecpoint &p, unsigned int *digest);
void hashPublicKeyCompressed(const secp256k1::ecpoint &p, unsigned int *digest);
void hashPublicKey(const unsigned int *x, const unsigned int *y, unsigned int *digest);
void hashPublicKeyCompressed(const unsigned int *x, const unsigned int *y, unsigned int *digest);
-};
-
+}
-#endif
\ No newline at end of file
+#endif
diff --git a/AddressUtil/AddressUtil.vcxproj b/AddressUtil/AddressUtil.vcxproj
index c2e6a66..df6aee8 100644
--- a/AddressUtil/AddressUtil.vcxproj
+++ b/AddressUtil/AddressUtil.vcxproj
@@ -5,6 +5,14 @@
Debug
Win32
+
+ Performance Release
+ Win32
+
+
+ Performance Release
+ x64
+
Release
Win32
@@ -37,32 +45,46 @@
{34042455-D274-432D-9134-C9EA41FD1B54}
Win32Proj
AddressUtil
- 10.0
+ 10.0.19041.0
StaticLibrary
true
- v141
+ ClangCl
+ Unicode
+
+
+ StaticLibrary
+ false
+ ClangCl
Unicode
StaticLibrary
false
- v141
+ ClangCl
true
Unicode
StaticLibrary
true
- v142
+ ClangCl
Unicode
+
+ StaticLibrary
+ false
+ ClangCl
+ Unicode
+ true
+ x64
+
StaticLibrary
false
- v142
+ ClangCL
true
Unicode
@@ -75,6 +97,10 @@
+
+
+
+
@@ -83,6 +109,10 @@
+
+
+
+
@@ -102,10 +132,23 @@
Windows
+
+
+
+
+ Level3
+ Disabled
+ _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)
+ $(SolutionDir)secp256k1lib;$(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories)
+
+
+ Windows
+
+
NotUsing
- Level3
+ EnableAllWarnings
Disabled
_CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)
$(SolutionDir)secp256k1lib;$(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories)
@@ -114,6 +157,26 @@
Windows
+
+
+ NotUsing
+ Level3
+ Disabled
+ _CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)
+ $(SolutionDir)secp256k1lib;$(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories)
+ None
+ MaxSpeed
+ AnySuitable
+ true
+ Speed
+ true
+ true
+ true
+
+
+ Windows
+
+
Level3
diff --git a/AddressUtil/Base58.cpp b/AddressUtil/Base58.cpp
index 2d80ef3..5c7cdda 100644
--- a/AddressUtil/Base58.cpp
+++ b/AddressUtil/Base58.cpp
@@ -1,28 +1,146 @@
#include
- type ripemd160.cl secp256k1.cl sha256.cl keysearch.cl > bitcrack.cl
+ type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl
+$(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl
+ Embed bitcrack.cl into bitcrack_cl.cpp
+
+
+
+
+ Level3
+ Disabled
+ true
+ true
+ $(SolutionDir)\KeyFinderLib;$(SolutionDir)\clUtil;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;$(SolutionDir)\secp256k1lib;$(SolutionDir)\Logger;$(SolutionDir)\util;%(AdditionalIncludeDirectories)
+ _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+
+
+ type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl
$(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl
Embed bitcrack.cl into bitcrack_cl.cpp
@@ -128,7 +173,33 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl
- type ripemd160.cl secp256k1.cl sha256.cl keysearch.cl > bitcrack.cl
+ type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl
+$(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl
+ Embed bitcrack.cl into bitcrack_cl.cpp
+
+
+
+
+ Level3
+ true
+ true
+ $(SolutionDir)\KeyFinderLib;$(SolutionDir)\clUtil;$(OPENCL_INCLUDE);$(SolutionDir)\secp256k1lib;$(SolutionDir)\Logger;$(SolutionDir)\util;%(AdditionalIncludeDirectories)
+ _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+ None
+ MaxSpeed
+ AnySuitable
+ true
+ Speed
+ true
+ true
+ true
+
+
+
+
+
+
+ type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl
$(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl
Embed bitcrack.cl into bitcrack_cl.cpp
@@ -149,7 +220,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cltrue
- type ripemd160.cl secp256k1.cl sha256.cl keysearch.cl > bitcrack.cl
+ type $(SolutionDir)clMath\ripemd160.cl $(SolutionDir)clMath\secp256k1.cl $(SolutionDir)clMath\sha256.cl $(SolutionDir)CLKeySearchDevice\bitcoin.cl $(SolutionDir)CLKeySearchDevice\bloomfilter.cl $(SolutionDir)CLKeySearchDevice\keysearch.cl > bitcrack.cl
$(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl
Embed bitcrack.cl into bitcrack_cl.cpp
@@ -162,6 +233,7 @@ $(SolutionDir)\tools\embedcl.exe bitcrack.cl bitcrack_cl.cpp _bitcrack_cl
+
diff --git a/CLKeySearchDevice/Makefile b/CLKeySearchDevice/Makefile
index 2da1746..41b85dc 100644
--- a/CLKeySearchDevice/Makefile
+++ b/CLKeySearchDevice/Makefile
@@ -2,7 +2,7 @@ NAME=CLKeySearchDevice
CPPSRC:=$(wildcard *.cpp)
all:
- cat ../clMath/sha256.cl ../clMath/secp256k1.cl ../clMath/ripemd160.cl keysearch.cl > bitcrack.cl
+ cat ../clMath/sha256.cl ../clMath/secp256k1.cl ../clMath/ripemd160.cl bloomfilter.cl bitcoin.cl keysearch.cl > bitcrack.cl
${BINDIR}/embedcl bitcrack.cl bitcrack_cl.cpp _bitcrack_cl
for file in ${CPPSRC} bitcrack_cl.cpp; do\
diff --git a/CLKeySearchDevice/bitcoin.cl b/CLKeySearchDevice/bitcoin.cl
new file mode 100644
index 0000000..3da6dbb
--- /dev/null
+++ b/CLKeySearchDevice/bitcoin.cl
@@ -0,0 +1,46 @@
+#ifndef BITCOIN_CL
+#define BITCOIN_CL
+
+#ifndef endian
+#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24)
+#endif
+
+void hashPublicKeyCompressed(const uint256_t x, const unsigned int yParity, unsigned int digest[5])
+{
+ __private unsigned int hash[8];
+
+ sha256PublicKeyCompressed(x.v, yParity, hash);
+
+ // Swap to little-endian
+ hash[0] = endian(hash[0]);
+ hash[1] = endian(hash[1]);
+ hash[2] = endian(hash[2]);
+ hash[3] = endian(hash[3]);
+ hash[4] = endian(hash[4]);
+ hash[5] = endian(hash[5]);
+ hash[6] = endian(hash[6]);
+ hash[7] = endian(hash[7]);
+
+ ripemd160sha256NoFinal(hash, digest);
+}
+
+void hashPublicKey(const uint256_t x, const uint256_t y, unsigned int digest[5])
+{
+ __private unsigned int hash[8];
+
+ sha256PublicKey(x.v, y.v, hash);
+
+ // Swap to little-endian
+ hash[0] = endian(hash[0]);
+ hash[1] = endian(hash[1]);
+ hash[2] = endian(hash[2]);
+ hash[3] = endian(hash[3]);
+ hash[4] = endian(hash[4]);
+ hash[5] = endian(hash[5]);
+ hash[6] = endian(hash[6]);
+ hash[7] = endian(hash[7]);
+
+ ripemd160sha256NoFinal(hash, digest);
+}
+
+#endif
diff --git a/CLKeySearchDevice/bitcrack.cl b/CLKeySearchDevice/bitcrack.cl
index 974faef..32ac980 100644
--- a/CLKeySearchDevice/bitcrack.cl
+++ b/CLKeySearchDevice/bitcrack.cl
@@ -1,24 +1,17 @@
-#ifndef _RIPEMD160_CL
-#define _RIPEMD160_CL
+#ifndef RIPEMD160_CL
+#define RIPEMD160_CL
+#ifndef endian
+#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24)
+#endif
-__constant unsigned int _RIPEMD160_IV[5] = {
- 0x67452301,
- 0xefcdab89,
- 0x98badcfe,
- 0x10325476,
- 0xc3d2e1f0
+__constant unsigned int RIPEMD160_IV[5] = {
+ 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0,
};
-__constant unsigned int _K0 = 0x5a827999;
-__constant unsigned int _K1 = 0x6ed9eba1;
-__constant unsigned int _K2 = 0x8f1bbcdc;
-__constant unsigned int _K3 = 0xa953fd4e;
-
-__constant unsigned int _K4 = 0x7a6d76e9;
-__constant unsigned int _K5 = 0x6d703ef3;
-__constant unsigned int _K6 = 0x5c4dd124;
-__constant unsigned int _K7 = 0x50a28be6;
+__constant unsigned int K[8] = {
+ 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xa953fd4e, 0x7a6d76e9, 0x6d703ef3, 0x5c4dd124, 0x50a28be6
+};
#define rotl(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
@@ -38,22 +31,22 @@ __constant unsigned int _K7 = 0x50a28be6;
c = rotl((c), 10)
#define GG(a, b, c, d, e, x, s)\
- a += G((b), (c), (d)) + (x) + _K0;\
+ a += G((b), (c), (d)) + (x) + K[0];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
#define HH(a, b, c, d, e, x, s)\
- a += H((b), (c), (d)) + (x) + _K1;\
+ a += H((b), (c), (d)) + (x) + K[1];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
#define II(a, b, c, d, e, x, s)\
- a += I((b), (c), (d)) + (x) + _K2;\
+ a += I((b), (c), (d)) + (x) + K[2];\
a = rotl((a), (s)) + e;\
c = rotl((c), 10)
#define JJ(a, b, c, d, e, x, s)\
- a += J((b), (c), (d)) + (x) + _K3;\
+ a += J((b), (c), (d)) + (x) + K[3];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
@@ -63,523 +56,298 @@ __constant unsigned int _K7 = 0x50a28be6;
c = rotl((c), 10)
#define GGG(a, b, c, d, e, x, s)\
- a += G((b), (c), (d)) + x + _K4;\
+ a += G((b), (c), (d)) + x + K[4];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
#define HHH(a, b, c, d, e, x, s)\
- a += H((b), (c), (d)) + (x) + _K5;\
+ a += H((b), (c), (d)) + (x) + K[5];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
#define III(a, b, c, d, e, x, s)\
- a += I((b), (c), (d)) + (x) + _K6;\
+ a += I((b), (c), (d)) + (x) + K[6];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
#define JJJ(a, b, c, d, e, x, s)\
- a += J((b), (c), (d)) + (x) + _K7;\
+ a += J((b), (c), (d)) + (x) + K[7];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
-
-void ripemd160sha256(const unsigned int x[8], unsigned int digest[5])
+void ripemd160p1(const unsigned int x[8], unsigned int digest[5])
{
- unsigned int a1 = _RIPEMD160_IV[0];
- unsigned int b1 = _RIPEMD160_IV[1];
- unsigned int c1 = _RIPEMD160_IV[2];
- unsigned int d1 = _RIPEMD160_IV[3];
- unsigned int e1 = _RIPEMD160_IV[4];
-
- const unsigned int x8 = 0x00000080;
- const unsigned int x14 = 256;
+ __private unsigned int a = RIPEMD160_IV[0];
+ __private unsigned int b = RIPEMD160_IV[1];
+ __private unsigned int c = RIPEMD160_IV[2];
+ __private unsigned int d = RIPEMD160_IV[3];
+ __private unsigned int e = RIPEMD160_IV[4];
/* round 1 */
- FF(a1, b1, c1, d1, e1, x[0], 11);
- FF(e1, a1, b1, c1, d1, x[1], 14);
- FF(d1, e1, a1, b1, c1, x[2], 15);
- FF(c1, d1, e1, a1, b1, x[3], 12);
- FF(b1, c1, d1, e1, a1, x[4], 5);
- FF(a1, b1, c1, d1, e1, x[5], 8);
- FF(e1, a1, b1, c1, d1, x[6], 7);
- FF(d1, e1, a1, b1, c1, x[7], 9);
- FF(c1, d1, e1, a1, b1, x8, 11);
- FF(b1, c1, d1, e1, a1, 0, 13);
- FF(a1, b1, c1, d1, e1, 0, 14);
- FF(e1, a1, b1, c1, d1, 0, 15);
- FF(d1, e1, a1, b1, c1, 0, 6);
- FF(c1, d1, e1, a1, b1, 0, 7);
- FF(b1, c1, d1, e1, a1, x14, 9);
- FF(a1, b1, c1, d1, e1, 0, 8);
+ FF(a, b, c, d, e, x[0], 11);
+ FF(e, a, b, c, d, x[1], 14);
+ FF(d, e, a, b, c, x[2], 15);
+ FF(c, d, e, a, b, x[3], 12);
+ FF(b, c, d, e, a, x[4], 5);
+ FF(a, b, c, d, e, x[5], 8);
+ FF(e, a, b, c, d, x[6], 7);
+ FF(d, e, a, b, c, x[7], 9);
+ FF(c, d, e, a, b, 128, 11);
+ FF(b, c, d, e, a, 0, 13);
+ FF(a, b, c, d, e, 0, 14);
+ FF(e, a, b, c, d, 0, 15);
+ FF(d, e, a, b, c, 0, 6);
+ FF(c, d, e, a, b, 0, 7);
+ FF(b, c, d, e, a, 256, 9);
+ FF(a, b, c, d, e, 0, 8);
/* round 2 */
- GG(e1, a1, b1, c1, d1, x[7], 7);
- GG(d1, e1, a1, b1, c1, x[4], 6);
- GG(c1, d1, e1, a1, b1, 0, 8);
- GG(b1, c1, d1, e1, a1, x[1], 13);
- GG(a1, b1, c1, d1, e1, 0, 11);
- GG(e1, a1, b1, c1, d1, x[6], 9);
- GG(d1, e1, a1, b1, c1, 0, 7);
- GG(c1, d1, e1, a1, b1, x[3], 15);
- GG(b1, c1, d1, e1, a1, 0, 7);
- GG(a1, b1, c1, d1, e1, x[0], 12);
- GG(e1, a1, b1, c1, d1, 0, 15);
- GG(d1, e1, a1, b1, c1, x[5], 9);
- GG(c1, d1, e1, a1, b1, x[2], 11);
- GG(b1, c1, d1, e1, a1, x14, 7);
- GG(a1, b1, c1, d1, e1, 0, 13);
- GG(e1, a1, b1, c1, d1, x8, 12);
+ GG(e, a, b, c, d, x[7], 7);
+ GG(d, e, a, b, c, x[4], 6);
+ GG(c, d, e, a, b, 0, 8);
+ GG(b, c, d, e, a, x[1], 13);
+ GG(a, b, c, d, e, 0, 11);
+ GG(e, a, b, c, d, x[6], 9);
+ GG(d, e, a, b, c, 0, 7);
+ GG(c, d, e, a, b, x[3], 15);
+ GG(b, c, d, e, a, 0, 7);
+ GG(a, b, c, d, e, x[0], 12);
+ GG(e, a, b, c, d, 0, 15);
+ GG(d, e, a, b, c, x[5], 9);
+ GG(c, d, e, a, b, x[2], 11);
+ GG(b, c, d, e, a, 256, 7);
+ GG(a, b, c, d, e, 0, 13);
+ GG(e, a, b, c, d, 0x80, 12);
/* round 3 */
- HH(d1, e1, a1, b1, c1, x[3], 11);
- HH(c1, d1, e1, a1, b1, 0, 13);
- HH(b1, c1, d1, e1, a1, x14, 6);
- HH(a1, b1, c1, d1, e1, x[4], 7);
- HH(e1, a1, b1, c1, d1, 0, 14);
- HH(d1, e1, a1, b1, c1, 0, 9);
- HH(c1, d1, e1, a1, b1, x8, 13);
- HH(b1, c1, d1, e1, a1, x[1], 15);
- HH(a1, b1, c1, d1, e1, x[2], 14);
- HH(e1, a1, b1, c1, d1, x[7], 8);
- HH(d1, e1, a1, b1, c1, x[0], 13);
- HH(c1, d1, e1, a1, b1, x[6], 6);
- HH(b1, c1, d1, e1, a1, 0, 5);
- HH(a1, b1, c1, d1, e1, 0, 12);
- HH(e1, a1, b1, c1, d1, x[5], 7);
- HH(d1, e1, a1, b1, c1, 0, 5);
+ HH(d, e, a, b, c, x[3], 11);
+ HH(c, d, e, a, b, 0, 13);
+ HH(b, c, d, e, a, 256, 6);
+ HH(a, b, c, d, e, x[4], 7);
+ HH(e, a, b, c, d, 0, 14);
+ HH(d, e, a, b, c, 0, 9);
+ HH(c, d, e, a, b, 0x80, 13);
+ HH(b, c, d, e, a, x[1], 15);
+ HH(a, b, c, d, e, x[2], 14);
+ HH(e, a, b, c, d, x[7], 8);
+ HH(d, e, a, b, c, x[0], 13);
+ HH(c, d, e, a, b, x[6], 6);
+ HH(b, c, d, e, a, 0, 5);
+ HH(a, b, c, d, e, 0, 12);
+ HH(e, a, b, c, d, x[5], 7);
+ HH(d, e, a, b, c, 0, 5);
/* round 4 */
- II(c1, d1, e1, a1, b1, x[1], 11);
- II(b1, c1, d1, e1, a1, 0, 12);
- II(a1, b1, c1, d1, e1, 0, 14);
- II(e1, a1, b1, c1, d1, 0, 15);
- II(d1, e1, a1, b1, c1, x[0], 14);
- II(c1, d1, e1, a1, b1, x8, 15);
- II(b1, c1, d1, e1, a1, 0, 9);
- II(a1, b1, c1, d1, e1, x[4], 8);
- II(e1, a1, b1, c1, d1, 0, 9);
- II(d1, e1, a1, b1, c1, x[3], 14);
- II(c1, d1, e1, a1, b1, x[7], 5);
- II(b1, c1, d1, e1, a1, 0, 6);
- II(a1, b1, c1, d1, e1, x14, 8);
- II(e1, a1, b1, c1, d1, x[5], 6);
- II(d1, e1, a1, b1, c1, x[6], 5);
- II(c1, d1, e1, a1, b1, x[2], 12);
+ II(c, d, e, a, b, x[1], 11);
+ II(b, c, d, e, a, 0, 12);
+ II(a, b, c, d, e, 0, 14);
+ II(e, a, b, c, d, 0, 15);
+ II(d, e, a, b, c, x[0], 14);
+ II(c, d, e, a, b, 0x80, 15);
+ II(b, c, d, e, a, 0, 9);
+ II(a, b, c, d, e, x[4], 8);
+ II(e, a, b, c, d, 0, 9);
+ II(d, e, a, b, c, x[3], 14);
+ II(c, d, e, a, b, x[7], 5);
+ II(b, c, d, e, a, 0, 6);
+ II(a, b, c, d, e, 256, 8);
+ II(e, a, b, c, d, x[5], 6);
+ II(d, e, a, b, c, x[6], 5);
+ II(c, d, e, a, b, x[2], 12);
/* round 5 */
- JJ(b1, c1, d1, e1, a1, x[4], 9);
- JJ(a1, b1, c1, d1, e1, x[0], 15);
- JJ(e1, a1, b1, c1, d1, x[5], 5);
- JJ(d1, e1, a1, b1, c1, 0, 11);
- JJ(c1, d1, e1, a1, b1, x[7], 6);
- JJ(b1, c1, d1, e1, a1, 0, 8);
- JJ(a1, b1, c1, d1, e1, x[2], 13);
- JJ(e1, a1, b1, c1, d1, 0, 12);
- JJ(d1, e1, a1, b1, c1, x14, 5);
- JJ(c1, d1, e1, a1, b1, x[1], 12);
- JJ(b1, c1, d1, e1, a1, x[3], 13);
- JJ(a1, b1, c1, d1, e1, x8, 14);
- JJ(e1, a1, b1, c1, d1, 0, 11);
- JJ(d1, e1, a1, b1, c1, x[6], 8);
- JJ(c1, d1, e1, a1, b1, 0, 5);
- JJ(b1, c1, d1, e1, a1, 0, 6);
-
- unsigned int a2 = _RIPEMD160_IV[0];
- unsigned int b2 = _RIPEMD160_IV[1];
- unsigned int c2 = _RIPEMD160_IV[2];
- unsigned int d2 = _RIPEMD160_IV[3];
- unsigned int e2 = _RIPEMD160_IV[4];
+ JJ(b, c, d, e, a, x[4], 9);
+ JJ(a, b, c, d, e, x[0], 15);
+ JJ(e, a, b, c, d, x[5], 5);
+ JJ(d, e, a, b, c, 0, 11);
+ JJ(c, d, e, a, b, x[7], 6);
+ JJ(b, c, d, e, a, 0, 8);
+ JJ(a, b, c, d, e, x[2], 13);
+ JJ(e, a, b, c, d, 0, 12);
+ JJ(d, e, a, b, c, 256, 5);
+ JJ(c, d, e, a, b, x[1], 12);
+ JJ(b, c, d, e, a, x[3], 13);
+ JJ(a, b, c, d, e, 0x80, 14);
+ JJ(e, a, b, c, d, 0, 11);
+ JJ(d, e, a, b, c, x[6], 8);
+ JJ(c, d, e, a, b, 0, 5);
+ JJ(b, c, d, e, a, 0, 6);
+
+ digest[0] = c;
+ digest[1] = d;
+ digest[2] = e;
+ digest[3] = a;
+ digest[4] = b;
+}
+
+void ripemd160p2(const unsigned int x[8], unsigned int digest[5])
+{
+ __private unsigned int a = RIPEMD160_IV[0];
+ __private unsigned int b = RIPEMD160_IV[1];
+ __private unsigned int c = RIPEMD160_IV[2];
+ __private unsigned int d = RIPEMD160_IV[3];
+ __private unsigned int e = RIPEMD160_IV[4];
/* parallel round 1 */
- JJJ(a2, b2, c2, d2, e2, x[5], 8);
- JJJ(e2, a2, b2, c2, d2, x14, 9);
- JJJ(d2, e2, a2, b2, c2, x[7], 9);
- JJJ(c2, d2, e2, a2, b2, x[0], 11);
- JJJ(b2, c2, d2, e2, a2, 0, 13);
- JJJ(a2, b2, c2, d2, e2, x[2], 15);
- JJJ(e2, a2, b2, c2, d2, 0, 15);
- JJJ(d2, e2, a2, b2, c2, x[4], 5);
- JJJ(c2, d2, e2, a2, b2, 0, 7);
- JJJ(b2, c2, d2, e2, a2, x[6], 7);
- JJJ(a2, b2, c2, d2, e2, 0, 8);
- JJJ(e2, a2, b2, c2, d2, x8, 11);
- JJJ(d2, e2, a2, b2, c2, x[1], 14);
- JJJ(c2, d2, e2, a2, b2, 0, 14);
- JJJ(b2, c2, d2, e2, a2, x[3], 12);
- JJJ(a2, b2, c2, d2, e2, 0, 6);
+ JJJ(a, b, c, d, e, x[5], 8);
+ JJJ(e, a, b, c, d, 256, 9);
+ JJJ(d, e, a, b, c, x[7], 9);
+ JJJ(c, d, e, a, b, x[0], 11);
+ JJJ(b, c, d, e, a, 0, 13);
+ JJJ(a, b, c, d, e, x[2], 15);
+ JJJ(e, a, b, c, d, 0, 15);
+ JJJ(d, e, a, b, c, x[4], 5);
+ JJJ(c, d, e, a, b, 0, 7);
+ JJJ(b, c, d, e, a, x[6], 7);
+ JJJ(a, b, c, d, e, 0, 8);
+ JJJ(e, a, b, c, d, 0x80, 11);
+ JJJ(d, e, a, b, c, x[1], 14);
+ JJJ(c, d, e, a, b, 0, 14);
+ JJJ(b, c, d, e, a, x[3], 12);
+ JJJ(a, b, c, d, e, 0, 6);
/* parallel round 2 */
- III(e2, a2, b2, c2, d2, x[6], 9);
- III(d2, e2, a2, b2, c2, 0, 13);
- III(c2, d2, e2, a2, b2, x[3], 15);
- III(b2, c2, d2, e2, a2, x[7], 7);
- III(a2, b2, c2, d2, e2, x[0], 12);
- III(e2, a2, b2, c2, d2, 0, 8);
- III(d2, e2, a2, b2, c2, x[5], 9);
- III(c2, d2, e2, a2, b2, 0, 11);
- III(b2, c2, d2, e2, a2, x14, 7);
- III(a2, b2, c2, d2, e2, 0, 7);
- III(e2, a2, b2, c2, d2, x8, 12);
- III(d2, e2, a2, b2, c2, 0, 7);
- III(c2, d2, e2, a2, b2, x[4], 6);
- III(b2, c2, d2, e2, a2, 0, 15);
- III(a2, b2, c2, d2, e2, x[1], 13);
- III(e2, a2, b2, c2, d2, x[2], 11);
+ III(e, a, b, c, d, x[6], 9);
+ III(d, e, a, b, c, 0, 13);
+ III(c, d, e, a, b, x[3], 15);
+ III(b, c, d, e, a, x[7], 7);
+ III(a, b, c, d, e, x[0], 12);
+ III(e, a, b, c, d, 0, 8);
+ III(d, e, a, b, c, x[5], 9);
+ III(c, d, e, a, b, 0, 11);
+ III(b, c, d, e, a, 256, 7);
+ III(a, b, c, d, e, 0, 7);
+ III(e, a, b, c, d, 0x80, 12);
+ III(d, e, a, b, c, 0, 7);
+ III(c, d, e, a, b, x[4], 6);
+ III(b, c, d, e, a, 0, 15);
+ III(a, b, c, d, e, x[1], 13);
+ III(e, a, b, c, d, x[2], 11);
/* parallel round 3 */
- HHH(d2, e2, a2, b2, c2, 0, 9);
- HHH(c2, d2, e2, a2, b2, x[5], 7);
- HHH(b2, c2, d2, e2, a2, x[1], 15);
- HHH(a2, b2, c2, d2, e2, x[3], 11);
- HHH(e2, a2, b2, c2, d2, x[7], 8);
- HHH(d2, e2, a2, b2, c2, x14, 6);
- HHH(c2, d2, e2, a2, b2, x[6], 6);
- HHH(b2, c2, d2, e2, a2, 0, 14);
- HHH(a2, b2, c2, d2, e2, 0, 12);
- HHH(e2, a2, b2, c2, d2, x8, 13);
- HHH(d2, e2, a2, b2, c2, 0, 5);
- HHH(c2, d2, e2, a2, b2, x[2], 14);
- HHH(b2, c2, d2, e2, a2, 0, 13);
- HHH(a2, b2, c2, d2, e2, x[0], 13);
- HHH(e2, a2, b2, c2, d2, x[4], 7);
- HHH(d2, e2, a2, b2, c2, 0, 5);
+ HHH(d, e, a, b, c, 0, 9);
+ HHH(c, d, e, a, b, x[5], 7);
+ HHH(b, c, d, e, a, x[1], 15);
+ HHH(a, b, c, d, e, x[3], 11);
+ HHH(e, a, b, c, d, x[7], 8);
+ HHH(d, e, a, b, c, 256, 6);
+ HHH(c, d, e, a, b, x[6], 6);
+ HHH(b, c, d, e, a, 0, 14);
+ HHH(a, b, c, d, e, 0, 12);
+ HHH(e, a, b, c, d, 0x80, 13);
+ HHH(d, e, a, b, c, 0, 5);
+ HHH(c, d, e, a, b, x[2], 14);
+ HHH(b, c, d, e, a, 0, 13);
+ HHH(a, b, c, d, e, x[0], 13);
+ HHH(e, a, b, c, d, x[4], 7);
+ HHH(d, e, a, b, c, 0, 5);
/* parallel round 4 */
- GGG(c2, d2, e2, a2, b2, x8, 15);
- GGG(b2, c2, d2, e2, a2, x[6], 5);
- GGG(a2, b2, c2, d2, e2, x[4], 8);
- GGG(e2, a2, b2, c2, d2, x[1], 11);
- GGG(d2, e2, a2, b2, c2, x[3], 14);
- GGG(c2, d2, e2, a2, b2, 0, 14);
- GGG(b2, c2, d2, e2, a2, 0, 6);
- GGG(a2, b2, c2, d2, e2, x[0], 14);
- GGG(e2, a2, b2, c2, d2, x[5], 6);
- GGG(d2, e2, a2, b2, c2, 0, 9);
- GGG(c2, d2, e2, a2, b2, x[2], 12);
- GGG(b2, c2, d2, e2, a2, 0, 9);
- GGG(a2, b2, c2, d2, e2, 0, 12);
- GGG(e2, a2, b2, c2, d2, x[7], 5);
- GGG(d2, e2, a2, b2, c2, 0, 15);
- GGG(c2, d2, e2, a2, b2, x14, 8);
+ GGG(c, d, e, a, b, 0x80, 15);
+ GGG(b, c, d, e, a, x[6], 5);
+ GGG(a, b, c, d, e, x[4], 8);
+ GGG(e, a, b, c, d, x[1], 11);
+ GGG(d, e, a, b, c, x[3], 14);
+ GGG(c, d, e, a, b, 0, 14);
+ GGG(b, c, d, e, a, 0, 6);
+ GGG(a, b, c, d, e, x[0], 14);
+ GGG(e, a, b, c, d, x[5], 6);
+ GGG(d, e, a, b, c, 0, 9);
+ GGG(c, d, e, a, b, x[2], 12);
+ GGG(b, c, d, e, a, 0, 9);
+ GGG(a, b, c, d, e, 0, 12);
+ GGG(e, a, b, c, d, x[7], 5);
+ GGG(d, e, a, b, c, 0, 15);
+ GGG(c, d, e, a, b, 256, 8);
/* parallel round 5 */
- FFF(b2, c2, d2, e2, a2, 0, 8);
- FFF(a2, b2, c2, d2, e2, 0, 5);
- FFF(e2, a2, b2, c2, d2, 0, 12);
- FFF(d2, e2, a2, b2, c2, x[4], 9);
- FFF(c2, d2, e2, a2, b2, x[1], 12);
- FFF(b2, c2, d2, e2, a2, x[5], 5);
- FFF(a2, b2, c2, d2, e2, x8, 14);
- FFF(e2, a2, b2, c2, d2, x[7], 6);
- FFF(d2, e2, a2, b2, c2, x[6], 8);
- FFF(c2, d2, e2, a2, b2, x[2], 13);
- FFF(b2, c2, d2, e2, a2, 0, 6);
- FFF(a2, b2, c2, d2, e2, x14, 5);
- FFF(e2, a2, b2, c2, d2, x[0], 15);
- FFF(d2, e2, a2, b2, c2, x[3], 13);
- FFF(c2, d2, e2, a2, b2, 0, 11);
- FFF(b2, c2, d2, e2, a2, 0, 11);
-
- digest[0] = _RIPEMD160_IV[1] + c1 + d2;
- digest[1] = _RIPEMD160_IV[2] + d1 + e2;
- digest[2] = _RIPEMD160_IV[3] + e1 + a2;
- digest[3] = _RIPEMD160_IV[4] + a1 + b2;
- digest[4] = _RIPEMD160_IV[0] + b1 + c2;
+ FFF(b, c, d, e, a, 0, 8);
+ FFF(a, b, c, d, e, 0, 5);
+ FFF(e, a, b, c, d, 0, 12);
+ FFF(d, e, a, b, c, x[4], 9);
+ FFF(c, d, e, a, b, x[1], 12);
+ FFF(b, c, d, e, a, x[5], 5);
+ FFF(a, b, c, d, e, 0x80, 14);
+ FFF(e, a, b, c, d, x[7], 6);
+ FFF(d, e, a, b, c, x[6], 8);
+ FFF(c, d, e, a, b, x[2], 13);
+ FFF(b, c, d, e, a, 0, 6);
+ FFF(a, b, c, d, e, 256, 5);
+ FFF(e, a, b, c, d, x[0], 15);
+ FFF(d, e, a, b, c, x[3], 13);
+ FFF(c, d, e, a, b, 0, 11);
+ FFF(b, c, d, e, a, 0, 11);
+
+ digest[0] = d;
+ digest[1] = e;
+ digest[2] = a;
+ digest[3] = b;
+ digest[4] = c;
}
-
void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5])
{
- unsigned int a1 = _RIPEMD160_IV[0];
- unsigned int b1 = _RIPEMD160_IV[1];
- unsigned int c1 = _RIPEMD160_IV[2];
- unsigned int d1 = _RIPEMD160_IV[3];
- unsigned int e1 = _RIPEMD160_IV[4];
-
- const unsigned int x8 = 0x00000080;
- const unsigned int x14 = 256;
-
- /* round 1 */
- FF(a1, b1, c1, d1, e1, x[0], 11);
- FF(e1, a1, b1, c1, d1, x[1], 14);
- FF(d1, e1, a1, b1, c1, x[2], 15);
- FF(c1, d1, e1, a1, b1, x[3], 12);
- FF(b1, c1, d1, e1, a1, x[4], 5);
- FF(a1, b1, c1, d1, e1, x[5], 8);
- FF(e1, a1, b1, c1, d1, x[6], 7);
- FF(d1, e1, a1, b1, c1, x[7], 9);
- FF(c1, d1, e1, a1, b1, x8, 11);
- FF(b1, c1, d1, e1, a1, 0, 13);
- FF(a1, b1, c1, d1, e1, 0, 14);
- FF(e1, a1, b1, c1, d1, 0, 15);
- FF(d1, e1, a1, b1, c1, 0, 6);
- FF(c1, d1, e1, a1, b1, 0, 7);
- FF(b1, c1, d1, e1, a1, x14, 9);
- FF(a1, b1, c1, d1, e1, 0, 8);
-
- /* round 2 */
- GG(e1, a1, b1, c1, d1, x[7], 7);
- GG(d1, e1, a1, b1, c1, x[4], 6);
- GG(c1, d1, e1, a1, b1, 0, 8);
- GG(b1, c1, d1, e1, a1, x[1], 13);
- GG(a1, b1, c1, d1, e1, 0, 11);
- GG(e1, a1, b1, c1, d1, x[6], 9);
- GG(d1, e1, a1, b1, c1, 0, 7);
- GG(c1, d1, e1, a1, b1, x[3], 15);
- GG(b1, c1, d1, e1, a1, 0, 7);
- GG(a1, b1, c1, d1, e1, x[0], 12);
- GG(e1, a1, b1, c1, d1, 0, 15);
- GG(d1, e1, a1, b1, c1, x[5], 9);
- GG(c1, d1, e1, a1, b1, x[2], 11);
- GG(b1, c1, d1, e1, a1, x14, 7);
- GG(a1, b1, c1, d1, e1, 0, 13);
- GG(e1, a1, b1, c1, d1, x8, 12);
-
- /* round 3 */
- HH(d1, e1, a1, b1, c1, x[3], 11);
- HH(c1, d1, e1, a1, b1, 0, 13);
- HH(b1, c1, d1, e1, a1, x14, 6);
- HH(a1, b1, c1, d1, e1, x[4], 7);
- HH(e1, a1, b1, c1, d1, 0, 14);
- HH(d1, e1, a1, b1, c1, 0, 9);
- HH(c1, d1, e1, a1, b1, x8, 13);
- HH(b1, c1, d1, e1, a1, x[1], 15);
- HH(a1, b1, c1, d1, e1, x[2], 14);
- HH(e1, a1, b1, c1, d1, x[7], 8);
- HH(d1, e1, a1, b1, c1, x[0], 13);
- HH(c1, d1, e1, a1, b1, x[6], 6);
- HH(b1, c1, d1, e1, a1, 0, 5);
- HH(a1, b1, c1, d1, e1, 0, 12);
- HH(e1, a1, b1, c1, d1, x[5], 7);
- HH(d1, e1, a1, b1, c1, 0, 5);
+ __private unsigned int digest1[5];
+ __private unsigned int digest2[5];
- /* round 4 */
- II(c1, d1, e1, a1, b1, x[1], 11);
- II(b1, c1, d1, e1, a1, 0, 12);
- II(a1, b1, c1, d1, e1, 0, 14);
- II(e1, a1, b1, c1, d1, 0, 15);
- II(d1, e1, a1, b1, c1, x[0], 14);
- II(c1, d1, e1, a1, b1, x8, 15);
- II(b1, c1, d1, e1, a1, 0, 9);
- II(a1, b1, c1, d1, e1, x[4], 8);
- II(e1, a1, b1, c1, d1, 0, 9);
- II(d1, e1, a1, b1, c1, x[3], 14);
- II(c1, d1, e1, a1, b1, x[7], 5);
- II(b1, c1, d1, e1, a1, 0, 6);
- II(a1, b1, c1, d1, e1, x14, 8);
- II(e1, a1, b1, c1, d1, x[5], 6);
- II(d1, e1, a1, b1, c1, x[6], 5);
- II(c1, d1, e1, a1, b1, x[2], 12);
-
- /* round 5 */
- JJ(b1, c1, d1, e1, a1, x[4], 9);
- JJ(a1, b1, c1, d1, e1, x[0], 15);
- JJ(e1, a1, b1, c1, d1, x[5], 5);
- JJ(d1, e1, a1, b1, c1, 0, 11);
- JJ(c1, d1, e1, a1, b1, x[7], 6);
- JJ(b1, c1, d1, e1, a1, 0, 8);
- JJ(a1, b1, c1, d1, e1, x[2], 13);
- JJ(e1, a1, b1, c1, d1, 0, 12);
- JJ(d1, e1, a1, b1, c1, x14, 5);
- JJ(c1, d1, e1, a1, b1, x[1], 12);
- JJ(b1, c1, d1, e1, a1, x[3], 13);
- JJ(a1, b1, c1, d1, e1, x8, 14);
- JJ(e1, a1, b1, c1, d1, 0, 11);
- JJ(d1, e1, a1, b1, c1, x[6], 8);
- JJ(c1, d1, e1, a1, b1, 0, 5);
- JJ(b1, c1, d1, e1, a1, 0, 6);
-
- unsigned int a2 = _RIPEMD160_IV[0];
- unsigned int b2 = _RIPEMD160_IV[1];
- unsigned int c2 = _RIPEMD160_IV[2];
- unsigned int d2 = _RIPEMD160_IV[3];
- unsigned int e2 = _RIPEMD160_IV[4];
-
- /* parallel round 1 */
- JJJ(a2, b2, c2, d2, e2, x[5], 8);
- JJJ(e2, a2, b2, c2, d2, x14, 9);
- JJJ(d2, e2, a2, b2, c2, x[7], 9);
- JJJ(c2, d2, e2, a2, b2, x[0], 11);
- JJJ(b2, c2, d2, e2, a2, 0, 13);
- JJJ(a2, b2, c2, d2, e2, x[2], 15);
- JJJ(e2, a2, b2, c2, d2, 0, 15);
- JJJ(d2, e2, a2, b2, c2, x[4], 5);
- JJJ(c2, d2, e2, a2, b2, 0, 7);
- JJJ(b2, c2, d2, e2, a2, x[6], 7);
- JJJ(a2, b2, c2, d2, e2, 0, 8);
- JJJ(e2, a2, b2, c2, d2, x8, 11);
- JJJ(d2, e2, a2, b2, c2, x[1], 14);
- JJJ(c2, d2, e2, a2, b2, 0, 14);
- JJJ(b2, c2, d2, e2, a2, x[3], 12);
- JJJ(a2, b2, c2, d2, e2, 0, 6);
-
- /* parallel round 2 */
- III(e2, a2, b2, c2, d2, x[6], 9);
- III(d2, e2, a2, b2, c2, 0, 13);
- III(c2, d2, e2, a2, b2, x[3], 15);
- III(b2, c2, d2, e2, a2, x[7], 7);
- III(a2, b2, c2, d2, e2, x[0], 12);
- III(e2, a2, b2, c2, d2, 0, 8);
- III(d2, e2, a2, b2, c2, x[5], 9);
- III(c2, d2, e2, a2, b2, 0, 11);
- III(b2, c2, d2, e2, a2, x14, 7);
- III(a2, b2, c2, d2, e2, 0, 7);
- III(e2, a2, b2, c2, d2, x8, 12);
- III(d2, e2, a2, b2, c2, 0, 7);
- III(c2, d2, e2, a2, b2, x[4], 6);
- III(b2, c2, d2, e2, a2, 0, 15);
- III(a2, b2, c2, d2, e2, x[1], 13);
- III(e2, a2, b2, c2, d2, x[2], 11);
-
- /* parallel round 3 */
- HHH(d2, e2, a2, b2, c2, 0, 9);
- HHH(c2, d2, e2, a2, b2, x[5], 7);
- HHH(b2, c2, d2, e2, a2, x[1], 15);
- HHH(a2, b2, c2, d2, e2, x[3], 11);
- HHH(e2, a2, b2, c2, d2, x[7], 8);
- HHH(d2, e2, a2, b2, c2, x14, 6);
- HHH(c2, d2, e2, a2, b2, x[6], 6);
- HHH(b2, c2, d2, e2, a2, 0, 14);
- HHH(a2, b2, c2, d2, e2, 0, 12);
- HHH(e2, a2, b2, c2, d2, x8, 13);
- HHH(d2, e2, a2, b2, c2, 0, 5);
- HHH(c2, d2, e2, a2, b2, x[2], 14);
- HHH(b2, c2, d2, e2, a2, 0, 13);
- HHH(a2, b2, c2, d2, e2, x[0], 13);
- HHH(e2, a2, b2, c2, d2, x[4], 7);
- HHH(d2, e2, a2, b2, c2, 0, 5);
-
- /* parallel round 4 */
- GGG(c2, d2, e2, a2, b2, x8, 15);
- GGG(b2, c2, d2, e2, a2, x[6], 5);
- GGG(a2, b2, c2, d2, e2, x[4], 8);
- GGG(e2, a2, b2, c2, d2, x[1], 11);
- GGG(d2, e2, a2, b2, c2, x[3], 14);
- GGG(c2, d2, e2, a2, b2, 0, 14);
- GGG(b2, c2, d2, e2, a2, 0, 6);
- GGG(a2, b2, c2, d2, e2, x[0], 14);
- GGG(e2, a2, b2, c2, d2, x[5], 6);
- GGG(d2, e2, a2, b2, c2, 0, 9);
- GGG(c2, d2, e2, a2, b2, x[2], 12);
- GGG(b2, c2, d2, e2, a2, 0, 9);
- GGG(a2, b2, c2, d2, e2, 0, 12);
- GGG(e2, a2, b2, c2, d2, x[7], 5);
- GGG(d2, e2, a2, b2, c2, 0, 15);
- GGG(c2, d2, e2, a2, b2, x14, 8);
+ ripemd160p1(x, digest1);
+ ripemd160p2(x, digest2);
- /* parallel round 5 */
- FFF(b2, c2, d2, e2, a2, 0, 8);
- FFF(a2, b2, c2, d2, e2, 0, 5);
- FFF(e2, a2, b2, c2, d2, 0, 12);
- FFF(d2, e2, a2, b2, c2, x[4], 9);
- FFF(c2, d2, e2, a2, b2, x[1], 12);
- FFF(b2, c2, d2, e2, a2, x[5], 5);
- FFF(a2, b2, c2, d2, e2, x8, 14);
- FFF(e2, a2, b2, c2, d2, x[7], 6);
- FFF(d2, e2, a2, b2, c2, x[6], 8);
- FFF(c2, d2, e2, a2, b2, x[2], 13);
- FFF(b2, c2, d2, e2, a2, 0, 6);
- FFF(a2, b2, c2, d2, e2, x14, 5);
- FFF(e2, a2, b2, c2, d2, x[0], 15);
- FFF(d2, e2, a2, b2, c2, x[3], 13);
- FFF(c2, d2, e2, a2, b2, 0, 11);
- FFF(b2, c2, d2, e2, a2, 0, 11);
-
- digest[0] = c1 + d2;
- digest[1] = d1 + e2;
- digest[2] = e1 + a2;
- digest[3] = a1 + b2;
- digest[4] = b1 + c2;
+ digest[0] = digest1[0] + digest2[0];
+ digest[1] = digest1[1] + digest2[1];
+ digest[2] = digest1[2] + digest2[2];
+ digest[3] = digest1[3] + digest2[3];
+ digest[4] = digest1[4] + digest2[4];
}
-#endif
-#ifndef _SECP256K1_CL
-#define _SECP256K1_CL
-
-typedef ulong uint64_t;
-
-typedef struct {
- uint v[8];
-}uint256_t;
+void ripemd160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
+{
+ hOut[0] = endian(hIn[0] + RIPEMD160_IV[1]);
+ hOut[1] = endian(hIn[1] + RIPEMD160_IV[2]);
+ hOut[2] = endian(hIn[2] + RIPEMD160_IV[3]);
+ hOut[3] = endian(hIn[3] + RIPEMD160_IV[4]);
+ hOut[4] = endian(hIn[4] + RIPEMD160_IV[0]);
+}
-/**
- Prime modulus 2^256 - 2^32 - 977
- */
-__constant unsigned int _P[8] = {
- 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
-};
+#endif
+#ifndef SECP256K1_CL
+#define SECP256K1_CL
-__constant unsigned int _P_MINUS1[8] = {
- 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
-};
+typedef struct uint256_t {
+ unsigned int v[8];
+} uint256_t;
/**
- Base point X
+ * Base point X
*/
-__constant unsigned int _GX[8] = {
+__constant unsigned int GX[8] = {
0x79BE667E, 0xF9DCBBAC, 0x55A06295, 0xCE870B07, 0x029BFCDB, 0x2DCE28D9, 0x59F2815B, 0x16F81798
};
/**
- Base point Y
+ * Base point Y
*/
-__constant unsigned int _GY[8] = {
+__constant unsigned int GY[8] = {
0x483ADA77, 0x26A3C465, 0x5DA4FBFC, 0x0E1108A8, 0xFD17B448, 0xA6855419, 0x9C47D08F, 0xFB10D4B8
};
-
/**
* Group order
*/
-__constant unsigned int _N[8] = {
+__constant unsigned int N[8] = {
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xBAAEDCE6, 0xAF48A03B, 0xBFD25E8C, 0xD0364141
};
-__constant unsigned int _INFINITY[8] = {
- 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
+/**
+ * Prime modulus 2^256 - 2^32 - 977
+ */
+__constant unsigned int P[8] = {
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
};
-void printBigInt(const unsigned int x[8])
-{
- printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
- x[0], x[1], x[2], x[3],
- x[4], x[5], x[6], x[7]);
-}
-
-// Add with carry
-unsigned int addc(unsigned int a, unsigned int b, unsigned int *carry)
-{
- unsigned int sum = a + *carry;
-
- unsigned int c1 = (sum < a) ? 1 : 0;
-
- sum = sum + b;
-
- unsigned int c2 = (sum < b) ? 1 : 0;
-
- *carry = c1 | c2;
-
- return sum;
-}
-
-// Subtract with borrow
-unsigned int subc(unsigned int a, unsigned int b, unsigned int *borrow)
-{
- unsigned int diff = a - *borrow;
-
- *borrow = (diff > a) ? 1 : 0;
-
- unsigned int diff2 = diff - b;
-
- *borrow |= (diff2 > diff) ? 1 : 0;
-
- return diff2;
-}
-
#ifdef DEVICE_VENDOR_INTEL
-
// Intel devices have a mul_hi bug
-unsigned int mul_hi977(unsigned int x)
+inline unsigned int mul_hi977(unsigned int x)
{
unsigned int high = x >> 16;
unsigned int low = x & 0xffff;
@@ -588,663 +356,673 @@ unsigned int mul_hi977(unsigned int x)
}
// 32 x 32 multiply-add
-void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c)
+inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c)
{
- *low = a * 977;
- unsigned int tmp = *low + c;
+ *low = *a * 977;
+ unsigned int tmp = *low + *c;
unsigned int carry = tmp < *low ? 1 : 0;
*low = tmp;
- *high = mul_hi977(a) + carry;
+ *high = mul_hi977(*a) + carry;
}
-
#else
-// 32 x 32 multiply-add
-void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c)
+inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c)
{
- *low = a * 977;
- unsigned int tmp = *low + c;
+ *low = *a * 977;
+ unsigned int tmp = *low + *c;
unsigned int carry = tmp < *low ? 1 : 0;
*low = tmp;
- *high = mad_hi(a, (unsigned int)977, carry);
+ *high = mad_hi(*a, (unsigned int)977, carry);
}
#endif
-// 32 x 32 multiply-add
-void madd(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b, unsigned int c)
-{
- *low = a * b;
- unsigned int tmp = *low + c;
- unsigned int carry = tmp < *low ? 1 : 0;
- *low = tmp;
- *high = mad_hi(a, b, carry);
-}
-
-void mull(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b)
-{
- *low = a * b;
- *high = mul_hi(a, b);
-}
-
-
-uint256_t sub256k(uint256_t a, uint256_t b, unsigned int* borrow_ptr)
-{
- unsigned int borrow = 0;
- uint256_t c;
-
- for(int i = 7; i >= 0; i--) {
- c.v[i] = subc(a.v[i], b.v[i], &borrow);
- }
-
- *borrow_ptr = borrow;
-
- return c;
-}
-
-bool greaterThanEqualToP(const unsigned int a[8])
-{
- for(int i = 0; i < 8; i++) {
- if(a[i] > _P_MINUS1[i]) {
- return true;
- } else if(a[i] < _P_MINUS1[i]) {
- return false;
- }
- }
-
- return true;
-}
+// Add with carry
+#define addc(a, b, sum, carry, tmp) \
+ sum = (a) + (carry); \
+ tmp = ((sum) < (a)) * 1; \
+ sum = (sum) + (b); \
+ carry = (tmp) | (((sum) < (b)) * 1);
+
+// subtract with borrow
+#define subc(a, b, diff, borrow, tmp) \
+ tmp = (a) - (borrow); \
+ borrow = ((tmp) > (a)) * 1; \
+ diff = (tmp) - (b); \
+ borrow |= ((diff) > (tmp)) ? 1 : 0;
+
+#define add256k(a, b, c, carry, tmp) \
+ addc(a[7], b[7], c[7], carry, tmp); \
+ addc(a[6], b[6], c[6], carry, tmp); \
+ addc(a[5], b[5], c[5], carry, tmp); \
+ addc(a[4], b[4], c[4], carry, tmp); \
+ addc(a[3], b[3], c[3], carry, tmp); \
+ addc(a[2], b[2], c[2], carry, tmp); \
+ addc(a[1], b[1], c[1], carry, tmp); \
+ addc(a[0], b[0], c[0], carry, tmp);
+
+#define sub256k( a, b, c, borrow, tmp) \
+ subc(a[7], b[7], c[7], borrow, tmp); \
+ subc(a[6], b[6], c[6], borrow, tmp); \
+ subc(a[5], b[5], c[5], borrow, tmp); \
+ subc(a[4], b[4], c[4], borrow, tmp); \
+ subc(a[3], b[3], c[3], borrow, tmp); \
+ subc(a[2], b[2], c[2], borrow, tmp); \
+ subc(a[1], b[1], c[1], borrow, tmp); \
+ subc(a[0], b[0], c[0], borrow, tmp);
+
+#define isInfinity256k(a) \
+ ( \
+ (a[0] == 0xffffffff) && \
+ (a[1] == 0xffffffff) && \
+ (a[2] == 0xffffffff) && \
+ (a[3] == 0xffffffff) && \
+ (a[4] == 0xffffffff) && \
+ (a[5] == 0xffffffff) && \
+ (a[6] == 0xffffffff) && \
+ (a[7] == 0xffffffff) \
+ )
+
+#define greaterOrEqualToP(a) \
+ (a[6] >= P[6] || a[7] >= P[7])
+
+#define equal256k(a, b) \
+ ( \
+ (a[0] == b[0]) && \
+ (a[1] == b[1]) && \
+ (a[2] == b[2]) && \
+ (a[3] == b[3]) && \
+ (a[4] == b[4]) && \
+ (a[5] == b[5]) && \
+ (a[6] == b[6]) && \
+ (a[7] == b[7]) \
+ )
void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int out_high[8], unsigned int out_low[8])
{
- unsigned int z[16];
-
- unsigned int high = 0;
+ __private unsigned long product;
// First round, overwrite z
- for(int j = 7; j >= 0; j--) {
-
- uint64_t product = (uint64_t)x[7] * y[j];
-
- product = product + high;
-
- z[7 + j + 1] = (unsigned int)product;
- high = (unsigned int)(product >> 32);
- }
- z[7] = high;
-
- for(int i = 6; i >= 0; i--) {
-
- high = 0;
-
- for(int j = 7; j >= 0; j--) {
-
- uint64_t product = (uint64_t)x[i] * y[j];
+ product = (unsigned long)x[7] * y[7];
+ out_low[7] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[6] + (unsigned int)(product >> 32);
+ out_low[6] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[5] + (unsigned int)(product >> 32);
+ out_low[5] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[4] + (unsigned int)(product >> 32);
+ out_low[4] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[3] + (unsigned int)(product >> 32);
+ out_low[3] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[2] + (unsigned int)(product >> 32);
+ out_low[2] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[1] + (unsigned int)(product >> 32);
+ out_low[1] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[0] + (unsigned int)(product >> 32);
+ out_low[0] = (unsigned int)product;
+ out_high[7] = (unsigned int)(product >> 32);
+
+ product = (unsigned long)x[6] * y[7] + out_low[6];
+ out_low[6] = (unsigned int)product;
+
+ /** round6 */
+ product = (unsigned long)x[6] * y[6] + out_low[5] + (product >> 32);
+ out_low[5] = (unsigned int)product;
+
+ product = (unsigned long)x[6] * y[5] + out_low[4] + (product >> 32);
+ out_low[4] = (unsigned int)product;
+
+ product = (unsigned long)x[6] * y[4] + out_low[3] + (product >> 32);
+ out_low[3] = (unsigned int)product;
+
+ product = (unsigned long)x[6] * y[3] + out_low[2] + (product >> 32);
+ out_low[2] = (unsigned int)product;
+
+ product = (unsigned long)x[6] * y[2] + out_low[1] + (product >> 32);
+ out_low[1] = (unsigned int)product;
+
+ product = (unsigned long)x[6] * y[1] + out_low[0] + (product >> 32);
+ out_low[0] = (unsigned int)product;
+
+ product = (unsigned long)x[6] * y[0] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
+ out_high[6] = product >> 32;
- product = product + z[i + j + 1] + high;
+ /** round 5 */
+ product = (unsigned long)x[5] * y[7] + out_low[5];
+ out_low[5] = (unsigned int)product;
- z[i + j + 1] = (unsigned int)product;
+ product = (unsigned long)x[5] * y[6] + out_low[4] + (product >> 32);
+ out_low[4] = (unsigned int)product;
- high = product >> 32;
- }
+ product = (unsigned long)x[5] * y[5] + out_low[3] + (product >> 32);
+ out_low[3] = (unsigned int)product;
- z[i] = high;
- }
+ product = (unsigned long)x[5] * y[4] + out_low[2] + (product >> 32);
+ out_low[2] = (unsigned int)product;
- for(int i = 0; i < 8; i++) {
- out_high[i] = z[i];
- out_low[i] = z[8 + i];
- }
-}
+ product = (unsigned long)x[5] * y[3] + out_low[1] + (product >> 32);
+ out_low[1] = (unsigned int)product;
+ product = (unsigned long)x[5] * y[2] + out_low[0] + (product >> 32);
+ out_low[0] = (unsigned int)product;
+
+ product = (unsigned long)x[5] * y[1] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
+
+ product = (unsigned long)x[5] * y[0] + out_high[6] + (product >> 32);
+ out_high[6] = (unsigned int)product;
+ out_high[5] = product >> 32;
-unsigned int add256(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
- unsigned int carry = 0;
-
- for(int i = 7; i >= 0; i--) {
- c[i] = addc(a[i], b[i], &carry);
- }
-
- return carry;
-}
-
-uint256_t add256k(uint256_t a, uint256_t b, unsigned int* carry_ptr)
-{
- uint256_t c;
- unsigned int carry = 0;
+ /** round 4 */
+ product = (unsigned long)x[4] * y[7] + out_low[4];
+ out_low[4] = (unsigned int)product;
- for(int i = 7; i >= 0; i--) {
- c.v[i] = addc(a.v[i], b.v[i], &carry);
- }
+ product = (unsigned long)x[4] * y[6] + out_low[3] + (product >> 32);
+ out_low[3] = (unsigned int)product;
- *carry_ptr = carry;
+ product = (unsigned long)x[4] * y[5] + out_low[2] + (product >> 32);
+ out_low[2] = (unsigned int)product;
- return c;
-}
+ product = (unsigned long)x[4] * y[4] + out_low[1] + (product >> 32);
+ out_low[1] = (unsigned int)product;
-bool isInfinity(const unsigned int x[8])
-{
- bool isf = true;
+ product = (unsigned long)x[4] * y[3] + out_low[0] + (product >> 32);
+ out_low[0] = (unsigned int)product;
- for(int i = 0; i < 8; i++) {
- if(x[i] != 0xffffffff) {
- isf = false;
- }
- }
-
- return isf;
-}
-
-bool isInfinity256k(const uint256_t x)
-{
- bool isf = true;
+ product = (unsigned long)x[4] * y[2] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
+
+ product = (unsigned long)x[4] * y[1] + out_high[6] + (product >> 32);
+ out_high[6] = (unsigned int)product;
+
+ product = (unsigned long)x[4] * y[0] + out_high[5] + (product >> 32);
+ out_high[5] = (unsigned int)product;
+ out_high[4] = product >> 32;
- for(int i = 0; i < 8; i++) {
- if(x.v[i] != 0xffffffff) {
- isf = false;
- }
- }
+ /** round 3 */
+ product = (unsigned long)x[3] * y[7] + out_low[3];
+ out_low[3] = (unsigned int)product;
- return isf;
-}
+ product = (unsigned long)x[3] * y[6] + out_low[2] + (product >> 32);
+ out_low[2] = (unsigned int)product;
-bool equal(const unsigned int a[8], const unsigned int b[8])
-{
- for(int i = 0; i < 8; i++) {
- if(a[i] != b[i]) {
- return false;
- }
- }
+ product = (unsigned long)x[3] * y[5] + out_low[1] + (product >> 32);
+ out_low[1] = (unsigned int)product;
- return true;
-}
+ product = (unsigned long)x[3] * y[4] + out_low[0] + (product >> 32);
+ out_low[0] = (unsigned int)product;
-bool equal256k(uint256_t a, uint256_t b)
-{
- for(int i = 0; i < 8; i++) {
- if(a.v[i] != b.v[i]) {
- return false;
- }
- }
+ product = (unsigned long)x[3] * y[3] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
- return true;
-}
+ product = (unsigned long)x[3] * y[2] + out_high[6] + (product >> 32);
+ out_high[6] = (unsigned int)product;
+
+ product = (unsigned long)x[3] * y[1] + out_high[5] + (product >> 32);
+ out_high[5] = (unsigned int)product;
+
+ product = (unsigned long)x[3] * y[0] + out_high[4] + (product >> 32);
+ out_high[4] = (unsigned int)product;
+ out_high[3] = product >> 32;
-inline uint256_t readInt256(__global const uint256_t* ara, int idx)
-{
- return ara[idx];
-}
+ /** round 2 */
+ product = (unsigned long)x[2] * y[7] + out_low[2];
+ out_low[2] = (unsigned int)product;
-/*
- * Read least-significant word
- */
-unsigned int readLSW(__global const unsigned int *ara, int idx)
-{
- return ara[idx * 8 + 7];
-}
+ product = (unsigned long)x[2] * y[6] + out_low[1] + (product >> 32);
+ out_low[1] = (unsigned int)product;
-unsigned int readLSW256k(__global const uint256_t* ara, int idx)
-{
- return ara[idx].v[7];
-}
+ product = (unsigned long)x[2] * y[5] + out_low[0] + (product >> 32);
+ out_low[0] = (unsigned int)product;
-unsigned int readWord256k(__global const uint256_t* ara, int idx, int word)
-{
- return ara[idx].v[word];
-}
+ product = (unsigned long)x[2] * y[4] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
-unsigned int addP(const unsigned int a[8], unsigned int c[8])
-{
- unsigned int carry = 0;
+ product = (unsigned long)x[2] * y[3] + out_high[6] + (product >> 32);
+ out_high[6] = (unsigned int)product;
- for(int i = 7; i >= 0; i--) {
- c[i] = addc(a[i], _P[i], &carry);
- }
+ product = (unsigned long)x[2] * y[2] + out_high[5] + (product >> 32);
+ out_high[5] = (unsigned int)product;
+
+ product = (unsigned long)x[2] * y[1] + out_high[4] + (product >> 32);
+ out_high[4] = (unsigned int)product;
+
+ product = (unsigned long)x[2] * y[0] + out_high[3] + (product >> 32);
+ out_high[3] = (unsigned int)product;
+ out_high[2] = product >> 32;
+
+ /** round 1 */
+ product = (unsigned long)x[1] * y[7] + out_low[1];
+ out_low[1] = (unsigned int)product;
- return carry;
-}
+ product = (unsigned long)x[1] * y[6] + out_low[0] + (product >> 32);
+ out_low[0] = (unsigned int)product;
-unsigned int subP(const unsigned int a[8], unsigned int c[8])
-{
- unsigned int borrow = 0;
- for(int i = 7; i >= 0; i--) {
- c[i] = subc(a[i], _P[i], &borrow);
- }
+ product = (unsigned long)x[1] * y[5] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
- return borrow;
-}
+ product = (unsigned long)x[1] * y[4] + out_high[6] + (product >> 32);
+ out_high[6] = (unsigned int)product;
-/**
- * Subtraction mod p
- */
-uint256_t subModP256k(uint256_t a, uint256_t b)
-{
- unsigned int borrow = 0;
- uint256_t c = sub256k(a, b, &borrow);
- if(borrow) {
- addP(c.v, c.v);
- }
+ product = (unsigned long)x[1] * y[3] + out_high[5] + (product >> 32);
+ out_high[5] = (unsigned int)product;
- return c;
-}
+ product = (unsigned long)x[1] * y[2] + out_high[4] + (product >> 32);
+ out_high[4] = (unsigned int)product;
+
+ product = (unsigned long)x[1] * y[1] + out_high[3] + (product >> 32);
+ out_high[3] = (unsigned int)product;
+
+ product = (unsigned long)x[1] * y[0] + out_high[2] + (product >> 32);
+ out_high[2] = (unsigned int)product;
+ out_high[1] = product >> 32;
+ /** round 0 */
+ product = (unsigned long)x[0] * y[7] + out_low[0];
+ out_low[0] = (unsigned int)product;
-uint256_t addModP256k(uint256_t a, uint256_t b)
-{
- unsigned int carry = 0;
+ product = (unsigned long)x[0] * y[6] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
- uint256_t c = add256k(a, b, &carry);
+ product = (unsigned long)x[0] * y[5] + out_high[6] + (product >> 32);
+ out_high[6] = (unsigned int)product;
- bool gt = false;
- for(int i = 0; i < 8; i++) {
- if(c.v[i] > _P[i]) {
- gt = true;
- break;
- } else if(c.v[i] < _P[i]) {
- break;
- }
- }
+ product = (unsigned long)x[0] * y[4] + out_high[5] + (product >> 32);
+ out_high[5] = (unsigned int)product;
- if(carry || gt) {
- subP(c.v, c.v);
- }
+ product = (unsigned long)x[0] * y[3] + out_high[4] + (product >> 32);
+ out_high[4] = (unsigned int)product;
- return c;
+ product = (unsigned long)x[0] * y[2] + out_high[3] + (product >> 32);
+ out_high[3] = (unsigned int)product;
+
+ product = (unsigned long)x[0] * y[1] + out_high[2] + (product >> 32);
+ out_high[2] = (unsigned int)product;
+
+ product = (unsigned long)x[0] * y[0] + out_high[1] + (product >> 32);
+ out_high[1] = (unsigned int)product;
+ out_high[0] = product >> 32;
}
-
-void mulModP(const unsigned int a[8], const unsigned int b[8], unsigned int product_low[8])
+void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8])
{
- unsigned int high[8];
+ __private unsigned int high[8];
+ __private unsigned int low[8];
- unsigned int hWord = 0;
- unsigned int carry = 0;
+ __private unsigned int hWord = 0;
+ __private unsigned int carry = 0;
+ __private unsigned int t = 0;
+ __private unsigned int product6 = 0;
+ __private unsigned int product7 = 0;
+ __private unsigned int tmp;
// 256 x 256 multiply
- multiply256(a, b, high, product_low);
+ multiply256(a, b, high, low);
+ product_low[7] = low[7];
+ product_low[6] = low[6];
+ product_low[5] = low[5];
+ product_low[4] = low[4];
+ product_low[3] = low[3];
+ product_low[2] = low[2];
+ product_low[1] = low[1];
+ product_low[0] = low[0];
// Add 2^32 * high to the low 256 bits (shift left 1 word and add)
// Affects product[14] to product[6]
- for(int i = 6; i >= 0; i--) {
- product_low[i] = addc(product_low[i], high[i + 1], &carry);
- }
- unsigned int product7 = addc(high[0], 0, &carry);
- unsigned int product6 = carry;
+ addc(product_low[6], high[7], product_low[6], carry, tmp);
+ addc(product_low[5], high[6], product_low[5], carry, tmp);
+ addc(product_low[4], high[5], product_low[4], carry, tmp);
+ addc(product_low[3], high[4], product_low[3], carry, tmp);
+ addc(product_low[2], high[3], product_low[2], carry, tmp);
+ addc(product_low[1], high[2], product_low[1], carry, tmp);
+ addc(product_low[0], high[1], product_low[0], carry, tmp);
+
+ addc(high[0], 0, product7, carry, tmp);
+ product6 = carry;
carry = 0;
// Multiply high by 977 and add to low
// Affects product[15] to product[5]
for(int i = 7; i >= 0; i--) {
- unsigned int t = 0;
- madd977(&hWord, &t, high[i], hWord);
- product_low[i] = addc(product_low[i], t, &carry);
+ madd977(&hWord, &t, &high[i], &hWord);
+ addc(product_low[i], t, product_low[i], carry, tmp);
+ t = 0;
}
- product7 = addc(product7, hWord, &carry);
- product6 = addc(product6, 0, &carry);
+ addc(product7, hWord, high[7], carry, tmp);
+ addc(product6, 0, high[6], carry, tmp);
// Multiply high 2 words by 2^32 and add to low
// Affects product[14] to product[7]
carry = 0;
- high[7] = product7;
- high[6] = product6;
- product7 = 0;
- product6 = 0;
+ addc(product_low[6], high[7], product_low[6], carry, tmp);
+ addc(product_low[5], high[6], product_low[5], carry, tmp);
- product_low[6] = addc(product_low[6], high[7], &carry);
- product_low[5] = addc(product_low[5], high[6], &carry);
-
- // Propagate the carry
- for(int i = 4; i >= 0; i--) {
- product_low[i] = addc(product_low[i], 0, &carry);
- }
- product7 = carry;
+ addc(product_low[4], 0, product_low[4], carry, tmp);
+ addc(product_low[3], 0, product_low[3], carry, tmp);
+ addc(product_low[2], 0, product_low[2], carry, tmp);
+ addc(product_low[1], 0, product_low[1], carry, tmp);
+ addc(product_low[0], 0, product_low[0], carry, tmp);
// Multiply top 2 words by 977 and add to low
// Affects product[15] to product[7]
carry = 0;
hWord = 0;
- unsigned int t = 0;
- madd977(&hWord, &t, high[7], hWord);
- product_low[7] = addc(product_low[7], t, &carry);
- madd977(&hWord, &t, high[6], hWord);
- product_low[6] = addc(product_low[6], t, &carry);
- product_low[5] = addc(product_low[5], hWord, &carry);
-
+ madd977(&hWord, &t, &high[7], &hWord);
+ addc(product_low[7], t, product_low[7], carry, tmp);
+ madd977(&hWord, &t, &high[6], &hWord);
+ addc(product_low[6], t, product_low[6], carry, tmp);
+ addc(product_low[5], hWord, product_low[5], carry, tmp);
// Propagate carry
- for(int i = 4; i >= 0; i--) {
- product_low[i] = addc(product_low[i], 0, &carry);
- }
- product7 = carry;
+ addc(product_low[4], 0, product_low[4], carry, tmp);
+ addc(product_low[3], 0, product_low[3], carry, tmp);
+ addc(product_low[2], 0, product_low[2], carry, tmp);
+ addc(product_low[1], 0, product_low[1], carry, tmp);
+ addc(product_low[0], 0, product_low[0], carry, tmp);
// Reduce if >= P
- if(product7 || greaterThanEqualToP(product_low)) {
- subP(product_low, product_low);
+ if(carry || greaterOrEqualToP(product_low)) {
+ carry = 0;
+ sub256k(product_low, P, product_low, carry, tmp);
}
}
-uint256_t mulModP256k(uint256_t a, uint256_t b)
-{
- uint256_t c;
-
- mulModP(a.v, b.v, c.v);
-
- return c;
-}
-
-
-uint256_t squareModP256k(uint256_t a)
+/**
+ * Subtraction mod p
+ */
+void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8])
{
- uint256_t b;
- mulModP(a.v, a.v, b.v);
-
- return b;
+ __private unsigned int borrow = 0;
+ __private unsigned int tmp;
+
+ sub256k(a, b, c, borrow, tmp);
+
+ if (borrow) {
+ borrow = 0;
+ add256k(c, P, c, borrow, tmp);
+ }
}
-
/**
* Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains
*/
-uint256_t invModP256k(uint256_t value)
+void invModP256k(unsigned int x[8])
{
- uint256_t x = value;
-
-
- //unsigned int y[8] = { 0, 0, 0, 0, 0, 0, 0, 1 };
- uint256_t y = {{0, 0, 0, 0, 0, 0, 0, 1}};
-
- // 0xd - 1101
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
-
- // 0x2 - 0010
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
-
- // 0xc = 0x1100
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
-
-
- // 0xfffff
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
-
-
- // 0xe - 1110
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffff
- for(int i = 0; i < 219; i++) {
- y = mulModP256k(x, y);
- x = squareModP256k(x);
+ __private unsigned int y[8] = {0, 0, 0, 0, 0, 0, 0, 1};
+
+ mulModP(x, y, y);
+ mulModP(x, x, x);
+ mulModP(x, x, x);
+ mulModP(x, y, y);
+ mulModP(x, x, x);
+ mulModP(x, y, y);
+ mulModP(x, x, x);
+ mulModP(x, x, x);
+ mulModP(x, y, y);
+
+ for(int i = 0; i < 5; i++) {
+ mulModP(x, x, x);
}
- y = mulModP256k(x, y);
- return y;
+ for(int i = 0; i < 22; i++) {
+ mulModP(x, y, y);
+ mulModP(x, x, x);
+ }
+
+ mulModP(x, x, x);
+
+ for(int i = 0; i < 222; i++) {
+ mulModP(x, y, y);
+ mulModP(x, x, x);
+ }
+
+ mulModP(x, y, x);
}
+void addModP256k(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
+{
+ __private unsigned int borrow = 0;
+ __private unsigned int carry = 0;
+ __private unsigned int tmp = 0;
+
+ add256k(a, b, c, carry, tmp);
+
+ if(carry) { sub256k(c, P, c, borrow, tmp); }
+
+ else if(c[0] > P[0]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[0] < P[0]) { }
+
+ else if(c[1] > P[1]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[1] < P[1]) { }
+
+ else if(c[2] > P[2]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[2] < P[2]) { }
+
+ else if(c[3] > P[3]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[3] < P[3]) { }
+
+ else if(c[4] > P[4]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[4] < P[4]) { }
+
+ else if(c[5] > P[5]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[5] < P[5]) { }
+
+ else if(c[6] > P[6]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[6] < P[6]) { }
+
+ else if(c[7] > P[7]) { sub256k(c, P, c, borrow, tmp); }
+}
-void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse)
+void doBatchInverse256k(unsigned int x[8])
{
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
- int dim = get_global_size(0);
+ invModP256k(x);
+}
+
+void beginBatchAdd256k(
+ const uint256_t px,
+ const uint256_t x,
+ __global uint256_t* chain,
+ const int i,
+ const int batchIdx,
+ uint256_t* inverse
+) {
+ __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+ __private int dim = get_global_size(0);
+
+ __private unsigned int t[8];
// x = Gx - x
- uint256_t t = subModP256k(px, x);
+ subModP256k(px.v, x.v, t);
// Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1,
// c[2] = diff2 * diff1 * diff0, etc
- *inverse = mulModP256k(*inverse, t);
+ mulModP(inverse->v, t, inverse->v);
chain[batchIdx * dim + gid] = *inverse;
}
-
-void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* xPtr, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse)
-{
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
- int dim = get_global_size(0);
-
- uint256_t x = xPtr[i];
-
- if(equal256k(px, x)) {
- x = addModP256k(py, py);
+void beginBatchAddWithDouble256k(
+ const uint256_t px,
+ const uint256_t py,
+ __global uint256_t* xPtr,
+ __global uint256_t* chain,
+ const int i,
+ const int batchIdx,
+ uint256_t* inverse
+) {
+ __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+ __private int dim = get_global_size(0);
+ __private uint256_t x = xPtr[i];
+
+ if(equal256k(px.v, x.v)) {
+ addModP256k(py.v,py.v, x.v);
} else {
// x = Gx - x
- x = subModP256k(px, x);
+ subModP256k(px.v, x.v, x.v);
}
// Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1,
// c[2] = diff2 * diff1 * diff0, etc
- *inverse = mulModP256k(x, *inverse);
+ mulModP(x.v, inverse->v, inverse->v);
chain[batchIdx * dim + gid] = *inverse;
}
+void completeBatchAdd256k(
+ const uint256_t px,
+ const uint256_t py,
+ __global uint256_t* xPtr,
+ __global uint256_t* yPtr,
+ const int i,
+ const int batchIdx,
+ __global uint256_t* chain,
+ uint256_t* inverse,
+ uint256_t* newX,
+ uint256_t* newY)
+{
+ __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+ __private int dim = get_global_size(0);
+ __private uint256_t x = xPtr[i];
+ __private uint256_t y = yPtr[i];
+
+ uint256_t s;
+ __private unsigned int tmp[8];
+
+ if(batchIdx != 0) {
+ uint256_t c;
+
+ c = chain[(batchIdx - 1) * dim + gid];
+ mulModP(inverse->v, c.v, s.v);
+
+ subModP256k(px.v, x.v, tmp);
+ mulModP(tmp, inverse->v, inverse->v);
+ } else {
+ s = *inverse;
+ }
+
+ subModP256k(py.v, y.v, tmp);
+
+ mulModP(tmp, s.v, s.v);
+
+ // Rx = s^2 - Gx - Qx
+ mulModP(s.v, s.v, tmp);
+
+ subModP256k(tmp, px.v, newX->v);
+ subModP256k(newX->v, x.v, newX->v);
+
+ // Ry = s(px - rx) - py
+ subModP256k(px.v, newX->v, tmp);
+ mulModP(s.v, tmp, newY->v);
+ subModP256k(newY->v, py.v, newY->v);
+}
void completeBatchAddWithDouble256k(
- uint256_t px,
- uint256_t py,
+ const uint256_t px,
+ const uint256_t py,
__global const uint256_t* xPtr,
__global const uint256_t* yPtr,
- int i,
- int batchIdx,
+ const int i,
+ const int batchIdx,
__global uint256_t* chain,
uint256_t* inverse,
uint256_t* newX,
uint256_t* newY)
{
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
- int dim = get_global_size(0);
- uint256_t s;
- uint256_t x;
- uint256_t y;
+ __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+ __private int dim = get_global_size(0);
+ __private uint256_t s;
+ __private uint256_t x;
+ __private uint256_t y;
x = xPtr[i];
y = yPtr[i];
if(batchIdx >= 1) {
- uint256_t c;
+ __private uint256_t c;
c = chain[(batchIdx - 1) * dim + gid];
- s = mulModP256k(*inverse, c);
+ mulModP(inverse->v, c.v, s.v);
uint256_t diff;
- if(equal256k(px, x)) {
- diff = addModP256k(py, py);
+ if(equal256k(px.v, x.v)) {
+ addModP256k(py.v, py.v, diff.v);
} else {
- diff = subModP256k(px, x);
+ subModP256k(px.v, x.v, diff.v);
}
- *inverse = mulModP256k(diff, *inverse);
+ mulModP(diff.v, inverse->v, inverse->v);
} else {
s = *inverse;
}
- if(equal256k(px, x)) {
+ if(equal256k(px.v, x.v)) {
// currently s = 1 / 2y
- uint256_t x2;
- uint256_t tx2;
- uint256_t x3;
+ __private uint256_t x2;
+ __private uint256_t tx2;
// 3x^2
- x2 = mulModP256k(x, x);
- tx2 = addModP256k(x2, x2);
- tx2 = addModP256k(x2, tx2);
+ mulModP(x.v, x.v, x2.v);
+ addModP256k(x2.v, x2.v, tx2.v);
+ addModP256k(x2.v, tx2.v, tx2.v);
// s = 3x^2 * 1/2y
- s = mulModP256k(tx2, s);
+ mulModP(tx2.v, s.v, s.v);
// s^2
- uint256_t s2;
- s2 = mulModP256k(s, s);
+ __private uint256_t s2;
+ mulModP(s.v, s.v, s2.v);
// Rx = s^2 - 2px
- *newX = subModP256k(s2, x);
- *newX = subModP256k(*newX, x);
+ subModP256k(s2.v, x.v, newX->v);
+ subModP256k(newX->v, x.v, newX->v);
// Ry = s(px - rx) - py
- uint256_t k;
- k = subModP256k(px, *newX);
- *newY = mulModP256k(s, k);
- *newY = subModP256k(*newY, py);
+ __private uint256_t k;
+ subModP256k(px.v, newX->v, k.v);
+ mulModP(s.v, k.v, newY->v);
+ subModP256k(newY->v, py.v,newY->v);
} else {
- uint256_t rise;
- rise = subModP256k(py, y);
+ __private uint256_t rise;
+ subModP256k(py.v, y.v, rise.v);
- s = mulModP256k(rise, s);
+ mulModP(rise.v, s.v, s.v);
// Rx = s^2 - Gx - Qx
- uint256_t s2;
- s2 = mulModP256k(s, s);
+ __private uint256_t s2;
+ mulModP(s.v, s.v, s2.v);
- *newX = subModP256k(s2, px);
- *newX = subModP256k(*newX, x);
+ subModP256k(s2.v, px.v, newX->v);
+ subModP256k(newX->v, x.v,newX->v);
// Ry = s(px - rx) - py
- uint256_t k;
- k = subModP256k(px, *newX);
- *newY = mulModP256k(s, k);
- *newY = subModP256k(*newY, py);
+ __private uint256_t k;
+ subModP256k(px.v, newX->v, k.v);
+ mulModP(s.v, k.v, newY->v);
+ subModP256k(newY->v, py.v, newY->v);
}
}
-
-void completeBatchAdd256k(
- uint256_t px,
- uint256_t py,
- __global uint256_t* xPtr,
- __global uint256_t* yPtr,
- int i,
- int batchIdx,
- __global uint256_t* chain,
- uint256_t* inverse,
- uint256_t* newX,
- uint256_t* newY)
+unsigned int readWord256k(__global const uint256_t* ara, const int idx, const int word)
{
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
- int dim = get_global_size(0);
-
- uint256_t s;
- uint256_t x;
-
- x = xPtr[i];
-
- if(batchIdx >= 1) {
- uint256_t c;
-
- c = chain[(batchIdx - 1) * dim + gid];
- s = mulModP256k(*inverse, c);
-
- uint256_t diff;
- diff = subModP256k(px, x);
- *inverse = mulModP256k(diff, *inverse);
- } else {
- s = *inverse;
- }
-
- uint256_t y;
- y = yPtr[i];
-
- uint256_t rise;
- rise = subModP256k(py, y);
-
- s = mulModP256k(rise, s);
-
- // Rx = s^2 - Gx - Qx
- uint256_t s2;
- s2 = mulModP256k(s, s);
-
- *newX = subModP256k(s2, px);
- *newX = subModP256k(*newX, x);
-
- // Ry = s(px - rx) - py
- uint256_t k;
- k = subModP256k(px, *newX);
- *newY = mulModP256k(s, k);
- *newY = subModP256k(*newY, py);
-}
-
-
-uint256_t doBatchInverse256k(uint256_t x)
-{
- return invModP256k(x);
+ return ara[idx].v[word];
}
#endif
-#ifndef _SHA256_CL
-#define _SHA256_CL
-
+#ifndef SHA256_CL
+#define SHA256_CL
__constant unsigned int _K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
@@ -1270,7 +1048,6 @@ __constant unsigned int _IV[8] = {
#define rotr(x, n) ((x) >> (n)) ^ ((x) << (32 - (n)))
-
#define MAJ(a, b, c) (((a) & (b)) ^ ((a) & (c)) ^ ((b) & (c)))
#define CH(e, f, g) (((e) & (f)) ^ (~(e) & (g)))
@@ -1279,17 +1056,25 @@ __constant unsigned int _IV[8] = {
#define s1(x) (rotr((x), 17) ^ rotr((x), 19) ^ ((x) >> 10))
-#define round(a, b, c, d, e, f, g, h, m, k)\
+#define roundSha(a, b, c, d, e, f, g, h, m, k)\
t = CH((e), (f), (g)) + (rotr((e), 6) ^ rotr((e), 11) ^ rotr((e), 25)) + (k) + (m);\
(d) += (t) + (h);\
(h) += (t) + MAJ((a), (b), (c)) + (rotr((a), 2) ^ rotr((a), 13) ^ rotr((a), 22))
-
void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned int digest[8])
{
- unsigned int a, b, c, d, e, f, g, h;
- unsigned int w[16];
- unsigned int t;
+ __private unsigned int a, b, c, d, e, f, g, h;
+ __private unsigned int w[16];
+ __private unsigned int t;
+
+ a = _IV[0];
+ b = _IV[1];
+ c = _IV[2];
+ d = _IV[3];
+ e = _IV[4];
+ f = _IV[5];
+ g = _IV[6];
+ h = _IV[7];
// 0x04 || x || y
w[0] = (x[0] >> 8) | 0x04000000;
@@ -1309,31 +1094,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = (y[6] >> 8) | (y[5] << 24);
w[15] = (y[7] >> 8) | (y[6] << 24);
- a = _IV[0];
- b = _IV[1];
- c = _IV[2];
- d = _IV[3];
- e = _IV[4];
- f = _IV[5];
- g = _IV[6];
- h = _IV[7];
-
- round(a, b, c, d, e, f, g, h, w[0], _K[0]);
- round(h, a, b, c, d, e, f, g, w[1], _K[1]);
- round(g, h, a, b, c, d, e, f, w[2], _K[2]);
- round(f, g, h, a, b, c, d, e, w[3], _K[3]);
- round(e, f, g, h, a, b, c, d, w[4], _K[4]);
- round(d, e, f, g, h, a, b, c, w[5], _K[5]);
- round(c, d, e, f, g, h, a, b, w[6], _K[6]);
- round(b, c, d, e, f, g, h, a, w[7], _K[7]);
- round(a, b, c, d, e, f, g, h, w[8], _K[8]);
- round(h, a, b, c, d, e, f, g, w[9], _K[9]);
- round(g, h, a, b, c, d, e, f, w[10], _K[10]);
- round(f, g, h, a, b, c, d, e, w[11], _K[11]);
- round(e, f, g, h, a, b, c, d, w[12], _K[12]);
- round(d, e, f, g, h, a, b, c, w[13], _K[13]);
- round(c, d, e, f, g, h, a, b, w[14], _K[14]);
- round(b, c, d, e, f, g, h, a, w[15], _K[15]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[3]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[4]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[5]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[6]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[7]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[8]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[9]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[10]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[11]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[12]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[13]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[14]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -1352,22 +1128,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[16]);
- round(h, a, b, c, d, e, f, g, w[1], _K[17]);
- round(g, h, a, b, c, d, e, f, w[2], _K[18]);
- round(f, g, h, a, b, c, d, e, w[3], _K[19]);
- round(e, f, g, h, a, b, c, d, w[4], _K[20]);
- round(d, e, f, g, h, a, b, c, w[5], _K[21]);
- round(c, d, e, f, g, h, a, b, w[6], _K[22]);
- round(b, c, d, e, f, g, h, a, w[7], _K[23]);
- round(a, b, c, d, e, f, g, h, w[8], _K[24]);
- round(h, a, b, c, d, e, f, g, w[9], _K[25]);
- round(g, h, a, b, c, d, e, f, w[10], _K[26]);
- round(f, g, h, a, b, c, d, e, w[11], _K[27]);
- round(e, f, g, h, a, b, c, d, w[12], _K[28]);
- round(d, e, f, g, h, a, b, c, w[13], _K[29]);
- round(c, d, e, f, g, h, a, b, w[14], _K[30]);
- round(b, c, d, e, f, g, h, a, w[15], _K[31]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -1386,22 +1162,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[32]);
- round(h, a, b, c, d, e, f, g, w[1], _K[33]);
- round(g, h, a, b, c, d, e, f, w[2], _K[34]);
- round(f, g, h, a, b, c, d, e, w[3], _K[35]);
- round(e, f, g, h, a, b, c, d, w[4], _K[36]);
- round(d, e, f, g, h, a, b, c, w[5], _K[37]);
- round(c, d, e, f, g, h, a, b, w[6], _K[38]);
- round(b, c, d, e, f, g, h, a, w[7], _K[39]);
- round(a, b, c, d, e, f, g, h, w[8], _K[40]);
- round(h, a, b, c, d, e, f, g, w[9], _K[41]);
- round(g, h, a, b, c, d, e, f, w[10], _K[42]);
- round(f, g, h, a, b, c, d, e, w[11], _K[43]);
- round(e, f, g, h, a, b, c, d, w[12], _K[44]);
- round(d, e, f, g, h, a, b, c, w[13], _K[45]);
- round(c, d, e, f, g, h, a, b, w[14], _K[46]);
- round(b, c, d, e, f, g, h, a, w[15], _K[47]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -1420,22 +1196,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[48]);
- round(h, a, b, c, d, e, f, g, w[1], _K[49]);
- round(g, h, a, b, c, d, e, f, w[2], _K[50]);
- round(f, g, h, a, b, c, d, e, w[3], _K[51]);
- round(e, f, g, h, a, b, c, d, w[4], _K[52]);
- round(d, e, f, g, h, a, b, c, w[5], _K[53]);
- round(c, d, e, f, g, h, a, b, w[6], _K[54]);
- round(b, c, d, e, f, g, h, a, w[7], _K[55]);
- round(a, b, c, d, e, f, g, h, w[8], _K[56]);
- round(h, a, b, c, d, e, f, g, w[9], _K[57]);
- round(g, h, a, b, c, d, e, f, w[10], _K[58]);
- round(f, g, h, a, b, c, d, e, w[11], _K[59]);
- round(e, f, g, h, a, b, c, d, w[12], _K[60]);
- round(d, e, f, g, h, a, b, c, w[13], _K[61]);
- round(c, d, e, f, g, h, a, b, w[14], _K[62]);
- round(b, c, d, e, f, g, h, a, w[15], _K[63]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]);
a += _IV[0];
b += _IV[1];
@@ -1447,35 +1223,34 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
h += _IV[7];
// store the intermediate hash value
- unsigned int tmp[8];
- tmp[0] = a;
- tmp[1] = b;
- tmp[2] = c;
- tmp[3] = d;
- tmp[4] = e;
- tmp[5] = f;
- tmp[6] = g;
- tmp[7] = h;
+ digest[0] = a;
+ digest[1] = b;
+ digest[2] = c;
+ digest[3] = d;
+ digest[4] = e;
+ digest[5] = f;
+ digest[6] = g;
+ digest[7] = h;
w[0] = (y[7] << 24) | 0x00800000;
- w[15] = 65 * 8;
-
- round(a, b, c, d, e, f, g, h, w[0], _K[0]);
- round(h, a, b, c, d, e, f, g, 0, _K[1]);
- round(g, h, a, b, c, d, e, f, 0, _K[2]);
- round(f, g, h, a, b, c, d, e, 0, _K[3]);
- round(e, f, g, h, a, b, c, d, 0, _K[4]);
- round(d, e, f, g, h, a, b, c, 0, _K[5]);
- round(c, d, e, f, g, h, a, b, 0, _K[6]);
- round(b, c, d, e, f, g, h, a, 0, _K[7]);
- round(a, b, c, d, e, f, g, h, 0, _K[8]);
- round(h, a, b, c, d, e, f, g, 0, _K[9]);
- round(g, h, a, b, c, d, e, f, 0, _K[10]);
- round(f, g, h, a, b, c, d, e, 0, _K[11]);
- round(e, f, g, h, a, b, c, d, 0, _K[12]);
- round(d, e, f, g, h, a, b, c, 0, _K[13]);
- round(c, d, e, f, g, h, a, b, 0, _K[14]);
- round(b, c, d, e, f, g, h, a, w[15], _K[15]);
+ w[15] = 520; // 65 * 8
+
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]);
+ roundSha(h, a, b, c, d, e, f, g, 0, _K[1]);
+ roundSha(g, h, a, b, c, d, e, f, 0, _K[2]);
+ roundSha(f, g, h, a, b, c, d, e, 0, _K[3]);
+ roundSha(e, f, g, h, a, b, c, d, 0, _K[4]);
+ roundSha(d, e, f, g, h, a, b, c, 0, _K[5]);
+ roundSha(c, d, e, f, g, h, a, b, 0, _K[6]);
+ roundSha(b, c, d, e, f, g, h, a, 0, _K[7]);
+ roundSha(a, b, c, d, e, f, g, h, 0, _K[8]);
+ roundSha(h, a, b, c, d, e, f, g, 0, _K[9]);
+ roundSha(g, h, a, b, c, d, e, f, 0, _K[10]);
+ roundSha(f, g, h, a, b, c, d, e, 0, _K[11]);
+ roundSha(e, f, g, h, a, b, c, d, 0, _K[12]);
+ roundSha(d, e, f, g, h, a, b, c, 0, _K[13]);
+ roundSha(c, d, e, f, g, h, a, b, 0, _K[14]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]);
w[0] = w[0] + s0(0) + 0 + s1(0);
w[1] = 0 + s0(0) + 0 + s1(w[15]);
@@ -1494,22 +1269,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[16]);
- round(h, a, b, c, d, e, f, g, w[1], _K[17]);
- round(g, h, a, b, c, d, e, f, w[2], _K[18]);
- round(f, g, h, a, b, c, d, e, w[3], _K[19]);
- round(e, f, g, h, a, b, c, d, w[4], _K[20]);
- round(d, e, f, g, h, a, b, c, w[5], _K[21]);
- round(c, d, e, f, g, h, a, b, w[6], _K[22]);
- round(b, c, d, e, f, g, h, a, w[7], _K[23]);
- round(a, b, c, d, e, f, g, h, w[8], _K[24]);
- round(h, a, b, c, d, e, f, g, w[9], _K[25]);
- round(g, h, a, b, c, d, e, f, w[10], _K[26]);
- round(f, g, h, a, b, c, d, e, w[11], _K[27]);
- round(e, f, g, h, a, b, c, d, w[12], _K[28]);
- round(d, e, f, g, h, a, b, c, w[13], _K[29]);
- round(c, d, e, f, g, h, a, b, w[14], _K[30]);
- round(b, c, d, e, f, g, h, a, w[15], _K[31]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -1528,22 +1303,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[32]);
- round(h, a, b, c, d, e, f, g, w[1], _K[33]);
- round(g, h, a, b, c, d, e, f, w[2], _K[34]);
- round(f, g, h, a, b, c, d, e, w[3], _K[35]);
- round(e, f, g, h, a, b, c, d, w[4], _K[36]);
- round(d, e, f, g, h, a, b, c, w[5], _K[37]);
- round(c, d, e, f, g, h, a, b, w[6], _K[38]);
- round(b, c, d, e, f, g, h, a, w[7], _K[39]);
- round(a, b, c, d, e, f, g, h, w[8], _K[40]);
- round(h, a, b, c, d, e, f, g, w[9], _K[41]);
- round(g, h, a, b, c, d, e, f, w[10], _K[42]);
- round(f, g, h, a, b, c, d, e, w[11], _K[43]);
- round(e, f, g, h, a, b, c, d, w[12], _K[44]);
- round(d, e, f, g, h, a, b, c, w[13], _K[45]);
- round(c, d, e, f, g, h, a, b, w[14], _K[46]);
- round(b, c, d, e, f, g, h, a, w[15], _K[47]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -1562,38 +1337,38 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[48]);
- round(h, a, b, c, d, e, f, g, w[1], _K[49]);
- round(g, h, a, b, c, d, e, f, w[2], _K[50]);
- round(f, g, h, a, b, c, d, e, w[3], _K[51]);
- round(e, f, g, h, a, b, c, d, w[4], _K[52]);
- round(d, e, f, g, h, a, b, c, w[5], _K[53]);
- round(c, d, e, f, g, h, a, b, w[6], _K[54]);
- round(b, c, d, e, f, g, h, a, w[7], _K[55]);
- round(a, b, c, d, e, f, g, h, w[8], _K[56]);
- round(h, a, b, c, d, e, f, g, w[9], _K[57]);
- round(g, h, a, b, c, d, e, f, w[10], _K[58]);
- round(f, g, h, a, b, c, d, e, w[11], _K[59]);
- round(e, f, g, h, a, b, c, d, w[12], _K[60]);
- round(d, e, f, g, h, a, b, c, w[13], _K[61]);
- round(c, d, e, f, g, h, a, b, w[14], _K[62]);
- round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
- digest[0] = tmp[0] + a;
- digest[1] = tmp[1] + b;
- digest[2] = tmp[2] + c;
- digest[3] = tmp[3] + d;
- digest[4] = tmp[4] + e;
- digest[5] = tmp[5] + f;
- digest[6] = tmp[6] + g;
- digest[7] = tmp[7] + h;
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]);
+
+ digest[0] += a;
+ digest[1] += b;
+ digest[2] += c;
+ digest[3] += d;
+ digest[4] += e;
+ digest[5] += f;
+ digest[6] += g;
+ digest[7] += h;
}
void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8])
{
- unsigned int a, b, c, d, e, f, g, h;
- unsigned int w[16];
- unsigned int t;
+ __private unsigned int a, b, c, d, e, f, g, h;
+ __private unsigned int w[16];
+ __private unsigned int t;
// 0x03 || x or 0x02 || x
w[0] = 0x02000000 | ((yParity & 1) << 24) | (x[0] >> 8);
@@ -1606,7 +1381,7 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
w[6] = (x[6] >> 8) | (x[5] << 24);
w[7] = (x[7] >> 8) | (x[6] << 24);
w[8] = (x[7] << 24) | 0x00800000;
- w[15] = 33 * 8;
+ w[15] = 264; // 33 * 8
a = _IV[0];
b = _IV[1];
@@ -1617,22 +1392,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
g = _IV[6];
h = _IV[7];
- round(a, b, c, d, e, f, g, h, w[0], _K[0]);
- round(h, a, b, c, d, e, f, g, w[1], _K[1]);
- round(g, h, a, b, c, d, e, f, w[2], _K[2]);
- round(f, g, h, a, b, c, d, e, w[3], _K[3]);
- round(e, f, g, h, a, b, c, d, w[4], _K[4]);
- round(d, e, f, g, h, a, b, c, w[5], _K[5]);
- round(c, d, e, f, g, h, a, b, w[6], _K[6]);
- round(b, c, d, e, f, g, h, a, w[7], _K[7]);
- round(a, b, c, d, e, f, g, h, w[8], _K[8]);
- round(h, a, b, c, d, e, f, g, 0, _K[9]);
- round(g, h, a, b, c, d, e, f, 0, _K[10]);
- round(f, g, h, a, b, c, d, e, 0, _K[11]);
- round(e, f, g, h, a, b, c, d, 0, _K[12]);
- round(d, e, f, g, h, a, b, c, 0, _K[13]);
- round(c, d, e, f, g, h, a, b, 0, _K[14]);
- round(b, c, d, e, f, g, h, a, w[15], _K[15]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[3]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[4]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[5]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[6]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[7]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[8]);
+ roundSha(h, a, b, c, d, e, f, g, 0, _K[9]);
+ roundSha(g, h, a, b, c, d, e, f, 0, _K[10]);
+ roundSha(f, g, h, a, b, c, d, e, 0, _K[11]);
+ roundSha(e, f, g, h, a, b, c, d, 0, _K[12]);
+ roundSha(d, e, f, g, h, a, b, c, 0, _K[13]);
+ roundSha(c, d, e, f, g, h, a, b, 0, _K[14]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]);
w[0] = w[0] + s0(w[1]) + 0 + s1(0);
w[1] = w[1] + s0(w[2]) + 0 + s1(w[15]);
@@ -1651,22 +1426,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[16]);
- round(h, a, b, c, d, e, f, g, w[1], _K[17]);
- round(g, h, a, b, c, d, e, f, w[2], _K[18]);
- round(f, g, h, a, b, c, d, e, w[3], _K[19]);
- round(e, f, g, h, a, b, c, d, w[4], _K[20]);
- round(d, e, f, g, h, a, b, c, w[5], _K[21]);
- round(c, d, e, f, g, h, a, b, w[6], _K[22]);
- round(b, c, d, e, f, g, h, a, w[7], _K[23]);
- round(a, b, c, d, e, f, g, h, w[8], _K[24]);
- round(h, a, b, c, d, e, f, g, w[9], _K[25]);
- round(g, h, a, b, c, d, e, f, w[10], _K[26]);
- round(f, g, h, a, b, c, d, e, w[11], _K[27]);
- round(e, f, g, h, a, b, c, d, w[12], _K[28]);
- round(d, e, f, g, h, a, b, c, w[13], _K[29]);
- round(c, d, e, f, g, h, a, b, w[14], _K[30]);
- round(b, c, d, e, f, g, h, a, w[15], _K[31]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -1685,22 +1460,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[32]);
- round(h, a, b, c, d, e, f, g, w[1], _K[33]);
- round(g, h, a, b, c, d, e, f, w[2], _K[34]);
- round(f, g, h, a, b, c, d, e, w[3], _K[35]);
- round(e, f, g, h, a, b, c, d, w[4], _K[36]);
- round(d, e, f, g, h, a, b, c, w[5], _K[37]);
- round(c, d, e, f, g, h, a, b, w[6], _K[38]);
- round(b, c, d, e, f, g, h, a, w[7], _K[39]);
- round(a, b, c, d, e, f, g, h, w[8], _K[40]);
- round(h, a, b, c, d, e, f, g, w[9], _K[41]);
- round(g, h, a, b, c, d, e, f, w[10], _K[42]);
- round(f, g, h, a, b, c, d, e, w[11], _K[43]);
- round(e, f, g, h, a, b, c, d, w[12], _K[44]);
- round(d, e, f, g, h, a, b, c, w[13], _K[45]);
- round(c, d, e, f, g, h, a, b, w[14], _K[46]);
- round(b, c, d, e, f, g, h, a, w[15], _K[47]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
@@ -1720,139 +1495,150 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[48]);
- round(h, a, b, c, d, e, f, g, w[1], _K[49]);
- round(g, h, a, b, c, d, e, f, w[2], _K[50]);
- round(f, g, h, a, b, c, d, e, w[3], _K[51]);
- round(e, f, g, h, a, b, c, d, w[4], _K[52]);
- round(d, e, f, g, h, a, b, c, w[5], _K[53]);
- round(c, d, e, f, g, h, a, b, w[6], _K[54]);
- round(b, c, d, e, f, g, h, a, w[7], _K[55]);
- round(a, b, c, d, e, f, g, h, w[8], _K[56]);
- round(h, a, b, c, d, e, f, g, w[9], _K[57]);
- round(g, h, a, b, c, d, e, f, w[10], _K[58]);
- round(f, g, h, a, b, c, d, e, w[11], _K[59]);
- round(e, f, g, h, a, b, c, d, w[12], _K[60]);
- round(d, e, f, g, h, a, b, c, w[13], _K[61]);
- round(c, d, e, f, g, h, a, b, w[14], _K[62]);
- round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
- a += _IV[0];
- b += _IV[1];
- c += _IV[2];
- d += _IV[3];
- e += _IV[4];
- f += _IV[5];
- g += _IV[6];
- h += _IV[7];
-
- digest[0] = a;
- digest[1] = b;
- digest[2] = c;
- digest[3] = d;
- digest[4] = e;
- digest[5] = f;
- digest[6] = g;
- digest[7] = h;
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]);
+
+ digest[0] = a + _IV[0];
+ digest[1] = b + _IV[1];
+ digest[2] = c + _IV[2];
+ digest[3] = d + _IV[3];
+ digest[4] = e + _IV[4];
+ digest[5] = f + _IV[5];
+ digest[6] = g + _IV[6];
+ digest[7] = h + _IV[7];
}
#endif
-#define COMPRESSED 0
-#define UNCOMPRESSED 1
-#define BOTH 2
-
-unsigned int endian(unsigned int x)
-{
- return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24);
-}
+#ifndef BITCOIN_CL
+#define BITCOIN_CL
-typedef struct {
- int idx;
- bool compressed;
- unsigned int x[8];
- unsigned int y[8];
- unsigned int digest[5];
-}CLDeviceResult;
+#ifndef endian
+#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24)
+#endif
-bool isInList(unsigned int hash[5], __global unsigned int *targetList, size_t numTargets)
+void hashPublicKeyCompressed(const uint256_t x, const unsigned int yParity, unsigned int digest[5])
{
- bool found = false;
+ __private unsigned int hash[8];
- for(size_t i = 0; i < numTargets; i++) {
- int equal = 0;
-
- for(int j = 0; j < 5; j++) {
- if(hash[j] == targetList[5 * i + j]) {
- equal++;
- }
- }
-
- if(equal == 5) {
- found = true;
- }
- }
+ sha256PublicKeyCompressed(x.v, yParity, hash);
- return found;
+ // Swap to little-endian
+ hash[0] = endian(hash[0]);
+ hash[1] = endian(hash[1]);
+ hash[2] = endian(hash[2]);
+ hash[3] = endian(hash[3]);
+ hash[4] = endian(hash[4]);
+ hash[5] = endian(hash[5]);
+ hash[6] = endian(hash[6]);
+ hash[7] = endian(hash[7]);
+
+ ripemd160sha256NoFinal(hash, digest);
}
-bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong mask)
+void hashPublicKey(const uint256_t x, const uint256_t y, unsigned int digest[5])
{
- bool foundMatch = true;
+ __private unsigned int hash[8];
- unsigned int h5 = 0;
-
- for(int i = 0; i < 5; i++) {
- h5 += hash[i];
- }
-
- uint64_t idx[5];
-
- idx[0] = ((hash[0] << 6) | (h5 & 0x3f)) & mask;
- idx[1] = ((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & mask;
- idx[2] = ((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & mask;
- idx[3] = ((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & mask;
- idx[4] = ((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & mask;
-
- for(int i = 0; i < 5; i++) {
- unsigned int j = idx[i];
- unsigned int f = targetList[j / 32];
-
- if((f & (0x01 << (j % 32))) == 0) {
- foundMatch = false;
- }
- }
+ sha256PublicKey(x.v, y.v, hash);
- return foundMatch;
+ // Swap to little-endian
+ hash[0] = endian(hash[0]);
+ hash[1] = endian(hash[1]);
+ hash[2] = endian(hash[2]);
+ hash[3] = endian(hash[3]);
+ hash[4] = endian(hash[4]);
+ hash[5] = endian(hash[5]);
+ hash[6] = endian(hash[6]);
+ hash[7] = endian(hash[7]);
+
+ ripemd160sha256NoFinal(hash, digest);
}
-bool checkHash(unsigned int hash[5], __global unsigned int *targetList, size_t numTargets, ulong mask)
+#endif
+#ifndef BLOOMFILTER_CL
+#define BLOOMFILTER_CL
+
+bool isInBloomFilter(const unsigned int hash[5], __global unsigned int *targetList, const ulong *mask)
{
- if(numTargets > 16) {
- return isInBloomFilter(hash, targetList, mask);
- } else {
- return isInList(hash, targetList, numTargets);
- }
+ unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4];
+
+ return (false ==
+ (
+ (targetList[(((hash[0] << 6) | (h5 & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[0] << 6) | (h5 & 0x3f)) & *mask) % 32))) == 0 ||
+ (targetList[(((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) % 32))) == 0 ||
+ (targetList[(((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) % 32))) == 0 ||
+ (targetList[(((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) % 32))) == 0 ||
+ (targetList[ (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) / 32] & (0x01 << ( (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) % 32))) == 0
+ )
+ );
}
+#endif
+#define COMPRESSED 0
+#define UNCOMPRESSED 1
+#define BOTH 2
-void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
-{
- const unsigned int iv[5] = {
- 0x67452301,
- 0xefcdab89,
- 0x98badcfe,
- 0x10325476,
- 0xc3d2e1f0
- };
+typedef struct {
+ int idx;
+ bool compressed;
+ unsigned int x[8];
+ unsigned int y[8];
+ unsigned int digest[5];
+}CLDeviceResult;
- for(int i = 0; i < 5; i++) {
- hOut[i] = endian(hIn[i] + iv[(i + 1) % 5]);
- }
-}
+void setResultFound(
+ const int idx,
+ const bool compressed,
+ const uint256_t x,
+ const uint256_t y,
+ const unsigned int digest[5],
+ __global CLDeviceResult* results,
+ __global unsigned int* numResults
+) {
+ CLDeviceResult r;
+
+ r.idx = idx;
+ r.compressed = compressed;
+ r.x[0] = x.v[0];
+ r.x[1] = x.v[1];
+ r.x[2] = x.v[2];
+ r.x[3] = x.v[3];
+ r.x[4] = x.v[4];
+ r.x[5] = x.v[5];
+ r.x[6] = x.v[6];
+ r.x[7] = x.v[7];
+
+ r.y[0] = y.v[0];
+ r.y[1] = y.v[1];
+ r.y[2] = y.v[2];
+ r.y[3] = y.v[3];
+ r.y[4] = y.v[4];
+ r.y[5] = y.v[5];
+ r.y[6] = y.v[6];
+ r.y[7] = y.v[7];
+
+ ripemd160FinalRound(digest, r.digest);
+
+ results[atomic_add(numResults, 1)] = r;
+}
-__kernel void multiplyStepKernel(
- int totalPoints,
- int step,
+__kernel void _initKeysKernel(
+ const unsigned int totalPoints,
+ const unsigned int step,
__global uint256_t* privateKeys,
__global uint256_t* chain,
__global uint256_t* gxPtr,
@@ -1862,51 +1648,33 @@ __kernel void multiplyStepKernel(
{
uint256_t gx;
uint256_t gy;
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+ int i = get_local_size(0) * get_group_id(0) + get_local_id(0);
int dim = get_global_size(0);
gx = gxPtr[step];
gy = gyPtr[step];
- // Multiply together all (_Gx - x) and then invert
uint256_t inverse = { {0,0,0,0,0,0,0,1} };
int batchIdx = 0;
- int i = gid;
- for(; i < totalPoints; i += dim) {
-
- unsigned int p;
- p = readWord256k(privateKeys, i, 7 - step / 32);
-
- unsigned int bit = p & (1 << (step % 32));
-
- uint256_t x = xPtr[i];
- if(bit != 0) {
- if(!isInfinity256k(x)) {
+ for(; i < totalPoints; i += dim) {
+ if(( (readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) {
+ if(!isInfinity256k(xPtr[i].v)) {
beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse);
batchIdx++;
}
}
}
- //doBatchInverse(inverse);
- inverse = doBatchInverse256k(inverse);
+ doBatchInverse256k(inverse.v);
+ uint256_t newX;
+ uint256_t newY;
i -= dim;
for(; i >= 0; i -= dim) {
- uint256_t newX;
- uint256_t newY;
-
- unsigned int p;
- p = readWord256k(privateKeys, i, 7 - step / 32);
- unsigned int bit = p & (1 << (step % 32));
-
- uint256_t x = xPtr[i];
- bool infinity = isInfinity256k(x);
-
- if(bit != 0) {
- if(!infinity) {
+ if(((readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) {
+ if(!isInfinity256k(xPtr[i].v)) {
batchIdx--;
completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY);
} else {
@@ -1920,75 +1688,19 @@ __kernel void multiplyStepKernel(
}
}
-
-void hashPublicKey(uint256_t x, uint256_t y, unsigned int* digestOut)
-{
- unsigned int hash[8];
-
- sha256PublicKey(x.v, y.v, hash);
-
- // Swap to little-endian
- for(int i = 0; i < 8; i++) {
- hash[i] = endian(hash[i]);
- }
-
- ripemd160sha256NoFinal(hash, digestOut);
-}
-
-void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int* digestOut)
-{
- unsigned int hash[8];
-
- sha256PublicKeyCompressed(x.v, yParity, hash);
-
- // Swap to little-endian
- for(int i = 0; i < 8; i++) {
- hash[i] = endian(hash[i]);
- }
-
- ripemd160sha256NoFinal(hash, digestOut);
-
-}
-
-void atomicListAdd(__global CLDeviceResult *results, __global unsigned int *numResults, CLDeviceResult *r)
-{
- unsigned int count = atomic_add(numResults, 1);
-
- results[count] = *r;
-}
-
-void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned int digest[5], __global CLDeviceResult* results, __global unsigned int* numResults)
-{
- CLDeviceResult r;
-
- r.idx = idx;
- r.compressed = compressed;
-
- for(int i = 0; i < 8; i++) {
- r.x[i] = x.v[i];
- r.y[i] = y.v[i];
- }
-
- doRMD160FinalRound(digest, r.digest);
-
- atomicListAdd(results, numResults, &r);
-}
-
-void doIteration(
- size_t totalPoints,
- int compression,
+__kernel void _stepKernel(
+ const unsigned int totalPoints,
__global uint256_t* chain,
__global uint256_t* xPtr,
__global uint256_t* yPtr,
__global uint256_t* incXPtr,
__global uint256_t* incYPtr,
- __global unsigned int *targetList,
- size_t numTargets,
- ulong mask,
+ __global unsigned int* targetList,
+ const ulong mask,
__global CLDeviceResult *results,
__global unsigned int *numResults)
{
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+ int i = get_local_size(0) * get_group_id(0) + get_local_id(0);
int dim = get_global_size(0);
uint256_t incX = *incXPtr;
@@ -1996,48 +1708,35 @@ void doIteration(
// Multiply together all (_Gx - x) and then invert
uint256_t inverse = { {0,0,0,0,0,0,0,1} };
- int i = gid;
int batchIdx = 0;
- for(; i < totalPoints; i += dim) {
- uint256_t x;
-
- unsigned int digest[5];
-
- x = xPtr[i];
-
- if((compression == UNCOMPRESSED) || (compression == BOTH)) {
- uint256_t y = yPtr[i];
-
- hashPublicKey(x, y, digest);
+ unsigned int digest[5];
- if(checkHash(digest, targetList, numTargets, mask)) {
- setResultFound(i, false, x, y, digest, results, numResults);
- }
+ for(; i < totalPoints; i += dim) {
+
+#if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH)
+ hashPublicKey(xPtr[i], yPtr[i], digest);
+ if(isInBloomFilter(digest, targetList, &mask)) {
+ setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults);
}
-
- if((compression == COMPRESSED) || (compression == BOTH)) {
-
- hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest);
-
- if(checkHash(digest, targetList, numTargets, mask)) {
- uint256_t y = yPtr[i];
- setResultFound(i, true, x, y, digest, results, numResults);
- }
+#endif
+#if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH)
+ hashPublicKeyCompressed(xPtr[i], yPtr[i].v[7], digest);
+ if(isInBloomFilter(digest, targetList, &mask)) {
+ setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults);
}
-
- beginBatchAdd256k(incX, x, chain, i, batchIdx, &inverse);
+#endif
+ beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse);
batchIdx++;
}
- inverse = doBatchInverse256k(inverse);
+ doBatchInverse256k(inverse.v);
i -= dim;
-
+ uint256_t newX;
+ uint256_t newY;
for(; i >= 0; i -= dim) {
- uint256_t newX;
- uint256_t newY;
batchIdx--;
completeBatchAdd256k(incX, incY, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY);
@@ -2046,22 +1745,19 @@ void doIteration(
}
}
-
-void doIterationWithDouble(
- size_t totalPoints,
- int compression,
+__kernel void _stepKernelWithDouble(
+ const unsigned int totalPoints,
__global uint256_t* chain,
__global uint256_t* xPtr,
__global uint256_t* yPtr,
__global uint256_t* incXPtr,
__global uint256_t* incYPtr,
__global unsigned int* targetList,
- size_t numTargets,
- ulong mask,
+ const ulong mask,
__global CLDeviceResult *results,
__global unsigned int *numResults)
{
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+ int i = get_local_size(0) * get_group_id(0) + get_local_id(0);
int dim = get_global_size(0);
uint256_t incX = *incXPtr;
@@ -2070,48 +1766,35 @@ void doIterationWithDouble(
// Multiply together all (_Gx - x) and then invert
uint256_t inverse = { {0,0,0,0,0,0,0,1} };
- int i = gid;
int batchIdx = 0;
- for(; i < totalPoints; i += dim) {
- uint256_t x;
-
- unsigned int digest[5];
-
- x = xPtr[i];
+ unsigned int digest[5];
- // uncompressed
- if((compression == UNCOMPRESSED) || (compression == BOTH)) {
- uint256_t y = yPtr[i];
- hashPublicKey(x, y, digest);
+ for(; i < totalPoints; i += dim) {
- if(checkHash(digest, targetList, numTargets, mask)) {
- setResultFound(i, false, x, y, digest, results, numResults);
- }
+#if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH)
+ hashPublicKey(xPtr[i], yPtr[i], digest);
+ if(isInBloomFilter(digest, targetList, &mask)) {
+ setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults);
}
-
- // compressed
- if((compression == COMPRESSED) || (compression == BOTH)) {
-
- hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest);
-
- if(checkHash(digest, targetList, numTargets, mask)) {
-
- uint256_t y = yPtr[i];
- setResultFound(i, true, x, y, digest, results, numResults);
- }
+#endif
+#if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH)
+ hashPublicKeyCompressed(xPtr[i], yPtr[i].v[7], digest);
+ if(isInBloomFilter(digest, targetList, &mask)) {
+ setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults);
}
+#endif
beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse);
batchIdx++;
}
- inverse = doBatchInverse256k(inverse);
+ doBatchInverse256k(inverse.v);
i -= dim;
+ uint256_t newX;
+ uint256_t newY;
for(; i >= 0; i -= dim) {
- uint256_t newX;
- uint256_t newY;
batchIdx--;
completeBatchAddWithDouble256k(incX, incY, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY);
@@ -2119,40 +1802,3 @@ void doIterationWithDouble(
yPtr[i] = newY;
}
}
-
-/**
-* Performs a single iteration
-*/
-__kernel void keyFinderKernel(
- unsigned int totalPoints,
- int compression,
- __global uint256_t* chain,
- __global uint256_t* xPtr,
- __global uint256_t* yPtr,
- __global uint256_t* incXPtr,
- __global uint256_t* incYPtr,
- __global unsigned int* targetList,
- ulong numTargets,
- ulong mask,
- __global CLDeviceResult *results,
- __global unsigned int *numResults)
-{
- doIteration(totalPoints, compression, chain, xPtr, yPtr, incXPtr, incYPtr, targetList, numTargets, mask, results, numResults);
-}
-
-__kernel void keyFinderKernelWithDouble(
- unsigned int totalPoints,
- int compression,
- __global uint256_t* chain,
- __global uint256_t* xPtr,
- __global uint256_t* yPtr,
- __global uint256_t* incXPtr,
- __global uint256_t* incYPtr,
- __global unsigned int* targetList,
- ulong numTargets,
- ulong mask,
- __global CLDeviceResult *results,
- __global unsigned int *numResults)
-{
- doIterationWithDouble(totalPoints, compression, chain, xPtr, yPtr, incXPtr, incYPtr, targetList, numTargets, mask, results, numResults);
-}
diff --git a/CLKeySearchDevice/bloomfilter.cl b/CLKeySearchDevice/bloomfilter.cl
new file mode 100644
index 0000000..3e6265f
--- /dev/null
+++ b/CLKeySearchDevice/bloomfilter.cl
@@ -0,0 +1,19 @@
+#ifndef BLOOMFILTER_CL
+#define BLOOMFILTER_CL
+
+bool isInBloomFilter(const unsigned int hash[5], __global unsigned int *targetList, const ulong *mask)
+{
+ unsigned int h5 = hash[0] + hash[1] + hash[2] + hash[3] + hash[4];
+
+ return (false ==
+ (
+ (targetList[(((hash[0] << 6) | (h5 & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[0] << 6) | (h5 & 0x3f)) & *mask) % 32))) == 0 ||
+ (targetList[(((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & *mask) % 32))) == 0 ||
+ (targetList[(((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & *mask) % 32))) == 0 ||
+ (targetList[(((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) / 32] & (0x01 << ((((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & *mask) % 32))) == 0 ||
+ (targetList[ (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) / 32] & (0x01 << ( (((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & *mask) % 32))) == 0
+ )
+ );
+}
+
+#endif
diff --git a/CLKeySearchDevice/keysearch.cl b/CLKeySearchDevice/keysearch.cl
index 5da94c0..2728d3a 100644
--- a/CLKeySearchDevice/keysearch.cl
+++ b/CLKeySearchDevice/keysearch.cl
@@ -2,11 +2,6 @@
#define UNCOMPRESSED 1
#define BOTH 2
-unsigned int endian(unsigned int x)
-{
- return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24);
-}
-
typedef struct {
int idx;
bool compressed;
@@ -15,86 +10,46 @@ typedef struct {
unsigned int digest[5];
}CLDeviceResult;
-bool isInList(unsigned int hash[5], __global unsigned int *targetList, size_t numTargets)
-{
- bool found = false;
-
- for(size_t i = 0; i < numTargets; i++) {
- int equal = 0;
-
- for(int j = 0; j < 5; j++) {
- if(hash[j] == targetList[5 * i + j]) {
- equal++;
- }
- }
-
- if(equal == 5) {
- found = true;
- }
- }
-
- return found;
-}
-
-bool isInBloomFilter(unsigned int hash[5], __global unsigned int *targetList, ulong mask)
-{
- bool foundMatch = true;
-
- unsigned int h5 = 0;
-
- for(int i = 0; i < 5; i++) {
- h5 += hash[i];
- }
-
- uint64_t idx[5];
-
- idx[0] = ((hash[0] << 6) | (h5 & 0x3f)) & mask;
- idx[1] = ((hash[1] << 6) | ((h5 >> 6) & 0x3f)) & mask;
- idx[2] = ((hash[2] << 6) | ((h5 >> 12) & 0x3f)) & mask;
- idx[3] = ((hash[3] << 6) | ((h5 >> 18) & 0x3f)) & mask;
- idx[4] = ((hash[4] << 6) | ((h5 >> 24) & 0x3f)) & mask;
-
- for(int i = 0; i < 5; i++) {
- unsigned int j = idx[i];
- unsigned int f = targetList[j / 32];
-
- if((f & (0x01 << (j % 32))) == 0) {
- foundMatch = false;
- }
- }
-
- return foundMatch;
-}
-
-bool checkHash(unsigned int hash[5], __global unsigned int *targetList, size_t numTargets, ulong mask)
-{
- if(numTargets > 16) {
- return isInBloomFilter(hash, targetList, mask);
- } else {
- return isInList(hash, targetList, numTargets);
- }
-}
-
+void setResultFound(
+ const int idx,
+ const bool compressed,
+ const uint256_t x,
+ const uint256_t y,
+ const unsigned int digest[5],
+ __global CLDeviceResult* results,
+ __global unsigned int* numResults
+) {
+ CLDeviceResult r;
-void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
-{
- const unsigned int iv[5] = {
- 0x67452301,
- 0xefcdab89,
- 0x98badcfe,
- 0x10325476,
- 0xc3d2e1f0
- };
+ r.idx = idx;
+ r.compressed = compressed;
- for(int i = 0; i < 5; i++) {
- hOut[i] = endian(hIn[i] + iv[(i + 1) % 5]);
- }
+ r.x[0] = x.v[0];
+ r.x[1] = x.v[1];
+ r.x[2] = x.v[2];
+ r.x[3] = x.v[3];
+ r.x[4] = x.v[4];
+ r.x[5] = x.v[5];
+ r.x[6] = x.v[6];
+ r.x[7] = x.v[7];
+
+ r.y[0] = y.v[0];
+ r.y[1] = y.v[1];
+ r.y[2] = y.v[2];
+ r.y[3] = y.v[3];
+ r.y[4] = y.v[4];
+ r.y[5] = y.v[5];
+ r.y[6] = y.v[6];
+ r.y[7] = y.v[7];
+
+ ripemd160FinalRound(digest, r.digest);
+
+ results[atomic_add(numResults, 1)] = r;
}
-
-__kernel void multiplyStepKernel(
- int totalPoints,
- int step,
+__kernel void _initKeysKernel(
+ const unsigned int totalPoints,
+ const unsigned int step,
__global uint256_t* privateKeys,
__global uint256_t* chain,
__global uint256_t* gxPtr,
@@ -104,51 +59,33 @@ __kernel void multiplyStepKernel(
{
uint256_t gx;
uint256_t gy;
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+ int i = get_local_size(0) * get_group_id(0) + get_local_id(0);
int dim = get_global_size(0);
gx = gxPtr[step];
gy = gyPtr[step];
- // Multiply together all (_Gx - x) and then invert
uint256_t inverse = { {0,0,0,0,0,0,0,1} };
int batchIdx = 0;
- int i = gid;
- for(; i < totalPoints; i += dim) {
-
- unsigned int p;
- p = readWord256k(privateKeys, i, 7 - step / 32);
-
- unsigned int bit = p & (1 << (step % 32));
- uint256_t x = xPtr[i];
-
- if(bit != 0) {
- if(!isInfinity256k(x)) {
+ for(; i < totalPoints; i += dim) {
+ if(( (readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) {
+ if(!isInfinity256k(xPtr[i].v)) {
beginBatchAddWithDouble256k(gx, gy, xPtr, chain, i, batchIdx, &inverse);
batchIdx++;
}
}
}
- //doBatchInverse(inverse);
- inverse = doBatchInverse256k(inverse);
+ doBatchInverse256k(inverse.v);
+ uint256_t newX;
+ uint256_t newY;
i -= dim;
for(; i >= 0; i -= dim) {
- uint256_t newX;
- uint256_t newY;
-
- unsigned int p;
- p = readWord256k(privateKeys, i, 7 - step / 32);
- unsigned int bit = p & (1 << (step % 32));
-
- uint256_t x = xPtr[i];
- bool infinity = isInfinity256k(x);
-
- if(bit != 0) {
- if(!infinity) {
+ if(((readWord256k(privateKeys, i, 7 - step / 32)) & (1 << (step % 32))) != 0) {
+ if(!isInfinity256k(xPtr[i].v)) {
batchIdx--;
completeBatchAddWithDouble256k(gx, gy, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY);
} else {
@@ -162,75 +99,19 @@ __kernel void multiplyStepKernel(
}
}
-
-void hashPublicKey(uint256_t x, uint256_t y, unsigned int* digestOut)
-{
- unsigned int hash[8];
-
- sha256PublicKey(x.v, y.v, hash);
-
- // Swap to little-endian
- for(int i = 0; i < 8; i++) {
- hash[i] = endian(hash[i]);
- }
-
- ripemd160sha256NoFinal(hash, digestOut);
-}
-
-void hashPublicKeyCompressed(uint256_t x, unsigned int yParity, unsigned int* digestOut)
-{
- unsigned int hash[8];
-
- sha256PublicKeyCompressed(x.v, yParity, hash);
-
- // Swap to little-endian
- for(int i = 0; i < 8; i++) {
- hash[i] = endian(hash[i]);
- }
-
- ripemd160sha256NoFinal(hash, digestOut);
-
-}
-
-void atomicListAdd(__global CLDeviceResult *results, __global unsigned int *numResults, CLDeviceResult *r)
-{
- unsigned int count = atomic_add(numResults, 1);
-
- results[count] = *r;
-}
-
-void setResultFound(int idx, bool compressed, uint256_t x, uint256_t y, unsigned int digest[5], __global CLDeviceResult* results, __global unsigned int* numResults)
-{
- CLDeviceResult r;
-
- r.idx = idx;
- r.compressed = compressed;
-
- for(int i = 0; i < 8; i++) {
- r.x[i] = x.v[i];
- r.y[i] = y.v[i];
- }
-
- doRMD160FinalRound(digest, r.digest);
-
- atomicListAdd(results, numResults, &r);
-}
-
-void doIteration(
- size_t totalPoints,
- int compression,
+__kernel void _stepKernel(
+ const unsigned int totalPoints,
__global uint256_t* chain,
__global uint256_t* xPtr,
__global uint256_t* yPtr,
__global uint256_t* incXPtr,
__global uint256_t* incYPtr,
- __global unsigned int *targetList,
- size_t numTargets,
- ulong mask,
+ __global unsigned int* targetList,
+ const ulong mask,
__global CLDeviceResult *results,
__global unsigned int *numResults)
{
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+ int i = get_local_size(0) * get_group_id(0) + get_local_id(0);
int dim = get_global_size(0);
uint256_t incX = *incXPtr;
@@ -238,48 +119,35 @@ void doIteration(
// Multiply together all (_Gx - x) and then invert
uint256_t inverse = { {0,0,0,0,0,0,0,1} };
- int i = gid;
int batchIdx = 0;
- for(; i < totalPoints; i += dim) {
- uint256_t x;
-
- unsigned int digest[5];
-
- x = xPtr[i];
-
- if((compression == UNCOMPRESSED) || (compression == BOTH)) {
- uint256_t y = yPtr[i];
-
- hashPublicKey(x, y, digest);
+ unsigned int digest[5];
- if(checkHash(digest, targetList, numTargets, mask)) {
- setResultFound(i, false, x, y, digest, results, numResults);
- }
+ for(; i < totalPoints; i += dim) {
+
+#if defined(COMPRESSION_UNCOMPRESSED) || defined(COMPRESSION_BOTH)
+ hashPublicKey(xPtr[i], yPtr[i], digest);
+ if(isInBloomFilter(digest, targetList, &mask)) {
+ setResultFound(i, false, xPtr[i], yPtr[i], digest, results, numResults);
}
-
- if((compression == COMPRESSED) || (compression == BOTH)) {
-
- hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest);
-
- if(checkHash(digest, targetList, numTargets, mask)) {
- uint256_t y = yPtr[i];
- setResultFound(i, true, x, y, digest, results, numResults);
- }
+#endif
+#if defined(COMPRESSION_COMPRESSED) || defined(COMPRESSION_BOTH)
+ hashPublicKeyCompressed(xPtr[i], yPtr[i].v[7], digest);
+ if(isInBloomFilter(digest, targetList, &mask)) {
+ setResultFound(i, true, xPtr[i], yPtr[i], digest, results, numResults);
}
-
- beginBatchAdd256k(incX, x, chain, i, batchIdx, &inverse);
+#endif
+ beginBatchAdd256k(incX, xPtr[i], chain, i, batchIdx, &inverse);
batchIdx++;
}
- inverse = doBatchInverse256k(inverse);
+ doBatchInverse256k(inverse.v);
i -= dim;
-
+ uint256_t newX;
+ uint256_t newY;
for(; i >= 0; i -= dim) {
- uint256_t newX;
- uint256_t newY;
batchIdx--;
completeBatchAdd256k(incX, incY, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY);
@@ -287,114 +155,3 @@ void doIteration(
yPtr[i] = newY;
}
}
-
-
-void doIterationWithDouble(
- size_t totalPoints,
- int compression,
- __global uint256_t* chain,
- __global uint256_t* xPtr,
- __global uint256_t* yPtr,
- __global uint256_t* incXPtr,
- __global uint256_t* incYPtr,
- __global unsigned int* targetList,
- size_t numTargets,
- ulong mask,
- __global CLDeviceResult *results,
- __global unsigned int *numResults)
-{
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
- int dim = get_global_size(0);
-
- uint256_t incX = *incXPtr;
- uint256_t incY = *incYPtr;
-
- // Multiply together all (_Gx - x) and then invert
- uint256_t inverse = { {0,0,0,0,0,0,0,1} };
-
- int i = gid;
- int batchIdx = 0;
- for(; i < totalPoints; i += dim) {
- uint256_t x;
-
- unsigned int digest[5];
-
- x = xPtr[i];
-
- // uncompressed
- if((compression == UNCOMPRESSED) || (compression == BOTH)) {
- uint256_t y = yPtr[i];
- hashPublicKey(x, y, digest);
-
- if(checkHash(digest, targetList, numTargets, mask)) {
- setResultFound(i, false, x, y, digest, results, numResults);
- }
- }
-
- // compressed
- if((compression == COMPRESSED) || (compression == BOTH)) {
-
- hashPublicKeyCompressed(x, readLSW256k(yPtr, i), digest);
-
- if(checkHash(digest, targetList, numTargets, mask)) {
-
- uint256_t y = yPtr[i];
- setResultFound(i, true, x, y, digest, results, numResults);
- }
- }
-
- beginBatchAddWithDouble256k(incX, incY, xPtr, chain, i, batchIdx, &inverse);
- batchIdx++;
- }
-
- inverse = doBatchInverse256k(inverse);
-
- i -= dim;
-
- for(; i >= 0; i -= dim) {
- uint256_t newX;
- uint256_t newY;
- batchIdx--;
- completeBatchAddWithDouble256k(incX, incY, xPtr, yPtr, i, batchIdx, chain, &inverse, &newX, &newY);
-
- xPtr[i] = newX;
- yPtr[i] = newY;
- }
-}
-
-/**
-* Performs a single iteration
-*/
-__kernel void keyFinderKernel(
- unsigned int totalPoints,
- int compression,
- __global uint256_t* chain,
- __global uint256_t* xPtr,
- __global uint256_t* yPtr,
- __global uint256_t* incXPtr,
- __global uint256_t* incYPtr,
- __global unsigned int* targetList,
- ulong numTargets,
- ulong mask,
- __global CLDeviceResult *results,
- __global unsigned int *numResults)
-{
- doIteration(totalPoints, compression, chain, xPtr, yPtr, incXPtr, incYPtr, targetList, numTargets, mask, results, numResults);
-}
-
-__kernel void keyFinderKernelWithDouble(
- unsigned int totalPoints,
- int compression,
- __global uint256_t* chain,
- __global uint256_t* xPtr,
- __global uint256_t* yPtr,
- __global uint256_t* incXPtr,
- __global uint256_t* incYPtr,
- __global unsigned int* targetList,
- ulong numTargets,
- ulong mask,
- __global CLDeviceResult *results,
- __global unsigned int *numResults)
-{
- doIterationWithDouble(totalPoints, compression, chain, xPtr, yPtr, incXPtr, incYPtr, targetList, numTargets, mask, results, numResults);
-}
diff --git a/CLUnitTests/CLUnitTests.vcxproj b/CLUnitTests/CLUnitTests.vcxproj
index 6a8e415..6056285 100644
--- a/CLUnitTests/CLUnitTests.vcxproj
+++ b/CLUnitTests/CLUnitTests.vcxproj
@@ -28,26 +28,26 @@
Application
true
- v141
+ ClangCl
MultiByte
Application
false
- v141
+ ClangCl
true
MultiByte
Application
true
- v142
+ ClangCl
MultiByte
Application
false
- v142
+ ClangCL
true
MultiByte
@@ -107,7 +107,7 @@ $(SolutionDir)\tools\embedcl.exe test.cl test.cpp _secp256k1_test_cl
- Level3
+ EnableAllWarnings
Disabled
true
true
diff --git a/CLUnitTests/main.cpp b/CLUnitTests/main.cpp
index f221f2f..4320254 100644
--- a/CLUnitTests/main.cpp
+++ b/CLUnitTests/main.cpp
@@ -67,7 +67,7 @@ int main(int argc, char **argv)
try {
devices = cl::getDevices();
}catch(cl::CLException ex) {
- std::cout << "Error: " << ex.msg << std::endl;
+ std::cout << "Error " << ex.msg << ": " << ex.description << std::endl;
return 1;
}
@@ -86,7 +86,7 @@ int main(int argc, char **argv)
numErrors += runTest(devices[i].id);
}
catch(cl::CLException ex) {
- std::cout << "Error " << ex.msg << std::endl;
+ std::cout << "Error " << ex.msg << ": " << ex.description << std::endl;
}
}
diff --git a/CLUnitTests/secp256k1test.cl b/CLUnitTests/secp256k1test.cl
index d3119a1..de7e0ba 100644
--- a/CLUnitTests/secp256k1test.cl
+++ b/CLUnitTests/secp256k1test.cl
@@ -8,6 +8,17 @@ typedef struct {
}CLErrorInfo;
+bool equal(const unsigned int a[8], const unsigned int b[8])
+{
+ for(int i = 0; i < 8; i++) {
+ if(a[i] != b[i]) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
bool addTest()
{
unsigned int x[8] = { 0xa4aea9b8, 0x6fe248f5, 0x1fc74965, 0xe9493264, 0x4e2dff0c, 0x009f7c9c, 0x832fa59b, 0x3361f837 };
diff --git a/CmdParse/CmdParse.cpp b/CmdParse/CmdParse.cpp
index adc1c6c..7855bd2 100644
--- a/CmdParse/CmdParse.cpp
+++ b/CmdParse/CmdParse.cpp
@@ -80,4 +80,4 @@ std::vector CmdParse::getArgs()
std::vector CmdParse::getOperands()
{
return _operands;
-}
\ No newline at end of file
+}
diff --git a/CmdParse/CmdParse.h b/CmdParse/CmdParse.h
index 3135fbf..ca85e03 100644
--- a/CmdParse/CmdParse.h
+++ b/CmdParse/CmdParse.h
@@ -1,5 +1,5 @@
-#ifndef _CMD_PARSE
-#define _CMD_PARSE
+#ifndef CMD_PARSE_H
+#define CMD_PARSE_H
#include
#include
@@ -53,4 +53,4 @@ class CmdParse {
std::vector getOperands();
};
-#endif
\ No newline at end of file
+#endif
diff --git a/CmdParse/CmdParse.vcxproj b/CmdParse/CmdParse.vcxproj
index e42fc4c..116946a 100644
--- a/CmdParse/CmdParse.vcxproj
+++ b/CmdParse/CmdParse.vcxproj
@@ -5,6 +5,14 @@
Debug
Win32
+
+ Performance Release
+ Win32
+
+
+ Performance Release
+ x64
+
Release
Win32
@@ -34,26 +42,40 @@
StaticLibrary
true
- v141
+ ClangCl
+ Unicode
+
+
+ StaticLibrary
+ false
+ ClangCl
Unicode
StaticLibrary
false
- v141
+ ClangCl
true
Unicode
StaticLibrary
true
- v142
+ ClangCl
+ Unicode
+
+
+ StaticLibrary
+ false
+ ClangCl
Unicode
+ true
+ x64
StaticLibrary
false
- v142
+ ClangCL
true
Unicode
@@ -66,6 +88,10 @@
+
+
+
+
@@ -74,6 +100,10 @@
+
+
+
+
@@ -92,10 +122,22 @@
Windows
+
+
+
+
+ Level3
+ Disabled
+ _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)
+
+
+ Windows
+
+
NotUsing
- Level3
+ EnableAllWarnings
Disabled
_CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)
@@ -103,6 +145,24 @@
Windows
+
+
+ NotUsing
+ Level3
+ _CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)
+ None
+ MaxSpeed
+ AnySuitable
+ true
+ Speed
+ true
+ true
+ true
+
+
+ Windows
+
+
Level3
diff --git a/CryptoUtil/CryptoUtil.h b/CryptoUtil/CryptoUtil.h
index fdd2b5c..aab42b6 100644
--- a/CryptoUtil/CryptoUtil.h
+++ b/CryptoUtil/CryptoUtil.h
@@ -10,7 +10,7 @@ namespace crypto {
public:
Rng();
- void get(unsigned char *buf, int len);
+ void get(unsigned char *buf, size_t len);
};
@@ -20,6 +20,6 @@ namespace crypto {
void sha256(unsigned int *msg, unsigned int *digest);
unsigned int checksum(const unsigned int *hash);
-};
+}
-#endif
\ No newline at end of file
+#endif
diff --git a/CryptoUtil/CryptoUtil.vcxproj b/CryptoUtil/CryptoUtil.vcxproj
index 8b0ed8b..53ab0fc 100644
--- a/CryptoUtil/CryptoUtil.vcxproj
+++ b/CryptoUtil/CryptoUtil.vcxproj
@@ -5,6 +5,14 @@
Debug
Win32
+
+ Performance Release
+ Win32
+
+
+ Performance Release
+ x64
+
Release
Win32
@@ -31,32 +39,46 @@
{CA46856A-1D1E-4F6F-A69C-6707D540BF36}
Win32Proj
CryptoUtil
- 10.0
+ 10.0.19041.0
StaticLibrary
true
- v141
+ ClangCl
+ Unicode
+
+
+ StaticLibrary
+ ClangCl
Unicode
StaticLibrary
false
- v141
+ ClangCl
true
Unicode
StaticLibrary
- true
- v142
+ false
+ ClangCl
Unicode
+ true
+
+
+ StaticLibrary
+ false
+ ClangCl
+ Unicode
+ true
+ x64
StaticLibrary
false
- v142
+ ClangCL
true
Unicode
@@ -69,6 +91,10 @@
+
+
+
+
@@ -77,6 +103,10 @@
+
+
+
+
@@ -95,10 +125,22 @@
Windows
+
+
+
+
+ Level3
+ Disabled
+ _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)
+
+
+ Windows
+
+
NotUsing
- Level3
+ EnableAllWarnings
Disabled
_CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)
@@ -106,6 +148,24 @@
Windows
+
+
+ NotUsing
+ Level3
+ MaxSpeed
+ _CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)
+ None
+ AnySuitable
+ true
+ Speed
+ true
+ true
+ true
+
+
+ Windows
+
+
Level3
diff --git a/CryptoUtil/Rng.cpp b/CryptoUtil/Rng.cpp
index ada92b1..c07b4ce 100644
--- a/CryptoUtil/Rng.cpp
+++ b/CryptoUtil/Rng.cpp
@@ -46,7 +46,7 @@ void crypto::Rng::reseed()
secureRandom((unsigned char *)_state, 32);
}
-void crypto::Rng::get(unsigned char *buf, int len)
+void crypto::Rng::get(unsigned char *buf, size_t len)
{
int i = 0;
while(len > 0) {
@@ -70,4 +70,4 @@ void crypto::Rng::get(unsigned char *buf, int len)
len -= len;
}
}
-}
\ No newline at end of file
+}
diff --git a/CryptoUtil/hash.cpp b/CryptoUtil/hash.cpp
index 138a562..5d534aa 100644
--- a/CryptoUtil/hash.cpp
+++ b/CryptoUtil/hash.cpp
@@ -2,11 +2,6 @@
#include
#include
-static unsigned int endian(unsigned int x)
-{
- return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24);
-}
-
unsigned int crypto::checksum(const unsigned int *hash)
{
unsigned int msg[16] = { 0 };
@@ -30,6 +25,8 @@ unsigned int crypto::checksum(const unsigned int *hash)
// Prepare to make a hash of the digest
memset(msg, 0, 16 * sizeof(unsigned int));
+
+ #pragma clang loop unroll(full)
for(int i = 0; i < 8; i++) {
msg[i] = digest[i];
}
diff --git a/CryptoUtil/ripemd160.cpp b/CryptoUtil/ripemd160.cpp
index f442f15..790a804 100644
--- a/CryptoUtil/ripemd160.cpp
+++ b/CryptoUtil/ripemd160.cpp
@@ -25,7 +25,7 @@ static unsigned int endian(unsigned int x)
return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24);
}
-static unsigned int rotl(unsigned int x, int n)
+static unsigned int rotl(unsigned int x, unsigned int n)
{
return (x << n) | (x >> (32 - n));
}
@@ -325,4 +325,4 @@ void crypto::ripemd160(unsigned int *x, unsigned int *digest)
digest[2] = endian(_IV[3] + e1 + a2);
digest[3] = endian(_IV[4] + a1 + b2);
digest[4] = endian(_IV[0] + b1 + c2);
-}
\ No newline at end of file
+}
diff --git a/CryptoUtil/sha256.cpp b/CryptoUtil/sha256.cpp
index 1f8d853..6247272 100644
--- a/CryptoUtil/sha256.cpp
+++ b/CryptoUtil/sha256.cpp
@@ -50,6 +50,7 @@ static void round(unsigned int a, unsigned int b, unsigned int c, unsigned int &
void crypto::sha256Init(unsigned int *digest)
{
+ #pragma clang loop unroll(full)
for(int i = 0; i < 8; i++) {
digest[i] = _IV[i];
}
@@ -70,11 +71,14 @@ void crypto::sha256(unsigned int *msg, unsigned int *digest)
h = digest[7];
unsigned int w[80] = { 0 };
+ #pragma clang loop unroll(full)
for(int i = 0; i < 16; i++) {
w[i] = msg[i];
}
// Expand 16 words to 64 words
+
+ #pragma clang loop unroll(full)
for(int i = 16; i < 64; i++) {
unsigned int x = w[i - 15];
unsigned int y = w[i - 2];
@@ -84,6 +88,7 @@ void crypto::sha256(unsigned int *msg, unsigned int *digest)
w[i] = w[i - 16] + s0 + w[i - 7] + s1;
}
+ #pragma clang loop unroll(full)
for(int i = 0; i < 64; i += 8) {
round(a, b, c, d, e, f, g, h, w[i], _K[i]);
round(h, a, b, c, d, e, f, g, w[i + 1], _K[i + 1]);
@@ -103,4 +108,4 @@ void crypto::sha256(unsigned int *msg, unsigned int *digest)
digest[5] += f;
digest[6] += g;
digest[7] += h;
-}
\ No newline at end of file
+}
diff --git a/CudaKeySearchDevice/CudaAtomicList.cu b/CudaKeySearchDevice/CudaAtomicList.cu
deleted file mode 100644
index dcf1096..0000000
--- a/CudaKeySearchDevice/CudaAtomicList.cu
+++ /dev/null
@@ -1,116 +0,0 @@
-#include "CudaAtomicList.h"
-#include "CudaAtomicList.cuh"
-
-#include
-
-#include
-#include
-
-static __constant__ void *_LIST_BUF[1];
-static __constant__ unsigned int *_LIST_SIZE[1];
-
-
-__device__ void atomicListAdd(void *info, unsigned int size)
-{
- unsigned int count = atomicAdd(_LIST_SIZE[0], 1);
-
- unsigned char *ptr = (unsigned char *)(_LIST_BUF[0]) + count * size;
-
- memcpy(ptr, info, size);
-}
-
-static cudaError_t setListPtr(void *ptr, unsigned int *numResults)
-{
- cudaError_t err = cudaMemcpyToSymbol(_LIST_BUF, &ptr, sizeof(void *));
-
- if(err) {
- return err;
- }
-
- err = cudaMemcpyToSymbol(_LIST_SIZE, &numResults, sizeof(unsigned int *));
-
- return err;
-}
-
-
-cudaError_t CudaAtomicList::init(unsigned int itemSize, unsigned int maxItems)
-{
- _itemSize = itemSize;
-
- // The number of results found in the most recent kernel run
- _countHostPtr = NULL;
- cudaError_t err = cudaHostAlloc(&_countHostPtr, sizeof(unsigned int), cudaHostAllocMapped);
- if(err) {
- goto end;
- }
-
- // Number of items in the list
- _countDevPtr = NULL;
- err = cudaHostGetDevicePointer(&_countDevPtr, _countHostPtr, 0);
- if(err) {
- goto end;
- }
- *_countHostPtr = 0;
-
- // Storage for results data
- _hostPtr = NULL;
- err = cudaHostAlloc(&_hostPtr, itemSize * maxItems, cudaHostAllocMapped);
- if(err) {
- goto end;
- }
-
- // Storage for results data (device to host pointer)
- _devPtr = NULL;
- err = cudaHostGetDevicePointer(&_devPtr, _hostPtr, 0);
-
- if(err) {
- goto end;
- }
-
- err = setListPtr(_devPtr, _countDevPtr);
-
-end:
- if(err) {
- cudaFreeHost(_countHostPtr);
-
- cudaFree(_countDevPtr);
-
- cudaFreeHost(_hostPtr);
-
- cudaFree(_devPtr);
- }
-
- return err;
-}
-
-unsigned int CudaAtomicList::size()
-{
- return *_countHostPtr;
-}
-
-void CudaAtomicList::clear()
-{
- *_countHostPtr = 0;
-}
-
-unsigned int CudaAtomicList::read(void *ptr, unsigned int count)
-{
- if(count >= *_countHostPtr) {
- count = *_countHostPtr;
- }
-
- memcpy(ptr, _hostPtr, count * _itemSize);
-
- return count;
-}
-
-void CudaAtomicList::cleanup()
-{
- cudaFreeHost(_countHostPtr);
-
- cudaFree(_countDevPtr);
-
- cudaFreeHost(_hostPtr);
-
- cudaFree(_devPtr);
-}
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaAtomicList.cuh b/CudaKeySearchDevice/CudaAtomicList.cuh
deleted file mode 100644
index 70dacb2..0000000
--- a/CudaKeySearchDevice/CudaAtomicList.cuh
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ATOMIC_LIST_CUH
-#define _ATOMIC_LIST_CUH
-
-#include
-
-__device__ void atomicListAdd(void *info, unsigned int size);
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaAtomicList.h b/CudaKeySearchDevice/CudaAtomicList.h
deleted file mode 100644
index 8bd9eeb..0000000
--- a/CudaKeySearchDevice/CudaAtomicList.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef _ATOMIC_LIST_HOST_H
-#define _ATOMIC_LIST_HOST_H
-
-#include
-
-/**
- A list that multiple device threads can append items to. Items can be
- read and removed by the host
- */
-class CudaAtomicList {
-
-private:
- void *_devPtr;
-
- void *_hostPtr;
-
- unsigned int *_countHostPtr;
-
- unsigned int *_countDevPtr;
-
- unsigned int _maxSize;
-
- unsigned int _itemSize;
-
-public:
-
- CudaAtomicList()
- {
- _devPtr = NULL;
- _hostPtr = NULL;
- _countHostPtr = NULL;
- _countDevPtr = NULL;
- _maxSize = 0;
- _itemSize = 0;
- }
-
- ~CudaAtomicList()
- {
- cleanup();
- }
-
- cudaError_t init(unsigned int itemSize, unsigned int maxItems);
-
- unsigned int read(void *dest, unsigned int count);
-
- unsigned int size();
-
- void clear();
-
- void cleanup();
-
-};
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaDeviceKeys.cu b/CudaKeySearchDevice/CudaDeviceKeys.cu
deleted file mode 100644
index d98dbaa..0000000
--- a/CudaKeySearchDevice/CudaDeviceKeys.cu
+++ /dev/null
@@ -1,397 +0,0 @@
-#include
-#include
-#include
-
-#include "CudaDeviceKeys.h"
-#include "CudaDeviceKeys.cuh"
-#include "secp256k1.cuh"
-
-
-__constant__ unsigned int *_xPtr[1];
-
-__constant__ unsigned int *_yPtr[1];
-
-
-__device__ unsigned int *ec::getXPtr()
-{
- return _xPtr[0];
-}
-
-__device__ unsigned int *ec::getYPtr()
-{
- return _yPtr[0];
-}
-
-__global__ void multiplyStepKernel(const unsigned int *privateKeys, int pointsPerThread, int step, unsigned int *chain, const unsigned int *gxPtr, const unsigned int *gyPtr);
-
-
-int CudaDeviceKeys::getIndex(int block, int thread, int idx)
-{
- // Total number of threads
- int totalThreads = _blocks * _threads;
-
- int base = idx * totalThreads;
-
- // Global ID of the current thread
- int threadId = block * _threads + thread;
-
- return base + threadId;
-}
-
-void CudaDeviceKeys::splatBigInt(unsigned int *dest, int block, int thread, int idx, const secp256k1::uint256 &i)
-{
- unsigned int value[8] = { 0 };
-
- i.exportWords(value, 8, secp256k1::uint256::BigEndian);
-
- int totalThreads = _blocks * _threads;
- int threadId = block * _threads + thread;
-
- int base = idx * _blocks * _threads * 8;
-
- int index = base + threadId;
-
- for(int k = 0; k < 8; k++) {
- dest[index] = value[k];
- index += totalThreads;
- }
-}
-
-secp256k1::uint256 CudaDeviceKeys::readBigInt(unsigned int *src, int block, int thread, int idx)
-{
- unsigned int value[8] = { 0 };
-
- int totalThreads = _blocks * _threads;
- int threadId = block * _threads + thread;
-
- int base = idx * _blocks * _threads * 8;
-
- int index = base + threadId;
-
- for(int k = 0; k < 8; k++) {
- value[k] = src[index];
- index += totalThreads;
- }
-
- secp256k1::uint256 v(value, secp256k1::uint256::BigEndian);
-
- return v;
-}
-
-/**
-* Allocates device memory for storing the multiplication chain used in
-the batch inversion operation
-*/
-cudaError_t CudaDeviceKeys::allocateChainBuf(unsigned int count)
-{
- cudaError_t err = cudaMalloc(&_devChain, count * sizeof(unsigned int) * 8);
-
- if(err) {
- return err;
- }
-
- return err;
-}
-
-cudaError_t CudaDeviceKeys::initializeBasePoints()
-{
- // generate a table of points G, 2G, 4G, 8G...(2^255)G
- std::vector table;
-
- table.push_back(secp256k1::G());
- for(int i = 1; i < 256; i++) {
-
- secp256k1::ecpoint p = doublePoint(table[i - 1]);
- if(!pointExists(p)) {
- throw std::string("Point does not exist!");
- }
- table.push_back(p);
- }
-
- unsigned int count = 256;
-
- cudaError_t err = cudaMalloc(&_devBasePointX, sizeof(unsigned int) * count * 8);
-
- if(err) {
- return err;
- }
-
- err = cudaMalloc(&_devBasePointY, sizeof(unsigned int) * count * 8);
- if(err) {
- return err;
- }
-
- unsigned int *tmpX = new unsigned int[count * 8];
- unsigned int *tmpY = new unsigned int[count * 8];
-
- for(int i = 0; i < 256; i++) {
- unsigned int bufX[8];
- unsigned int bufY[8];
- table[i].x.exportWords(bufX, 8, secp256k1::uint256::BigEndian);
- table[i].y.exportWords(bufY, 8, secp256k1::uint256::BigEndian);
-
- for(int j = 0; j < 8; j++) {
- tmpX[i * 8 + j] = bufX[j];
- tmpY[i * 8 + j] = bufY[j];
- }
- }
-
- err = cudaMemcpy(_devBasePointX, tmpX, count * 8 * sizeof(unsigned int), cudaMemcpyHostToDevice);
-
- delete[] tmpX;
-
- if(err) {
- delete[] tmpY;
- return err;
- }
-
- err = cudaMemcpy(_devBasePointY, tmpY, count * 8 * sizeof(unsigned int), cudaMemcpyHostToDevice);
-
- delete[] tmpY;
-
- return err;
-}
-
-cudaError_t CudaDeviceKeys::initializePublicKeys(size_t count)
-{
-
- // Allocate X array
- cudaError_t err = cudaMalloc(&_devX, sizeof(unsigned int) * count * 8);
- if(err) {
- return err;
- }
-
- // Clear X array
- err = cudaMemset(_devX, -1, sizeof(unsigned int) * count * 8);
- if(err) {
- return err;
- }
-
- // Allocate Y array
- err = cudaMalloc(&_devY, sizeof(unsigned int) * count * 8);
- if(err) {
- return err;
- }
-
- // Clear Y array
- err = cudaMemset(_devY, -1, sizeof(unsigned int) * count * 8);
- if(err) {
- return err;
- }
-
- err = cudaMemcpyToSymbol(_xPtr, &_devX, sizeof(unsigned int *));
- if(err) {
- return err;
- }
-
- err = cudaMemcpyToSymbol(_yPtr, &_devY, sizeof(unsigned int *));
-
- return err;
-}
-
-cudaError_t CudaDeviceKeys::init(int blocks, int threads, int pointsPerThread, const std::vector &privateKeys)
-{
- _blocks = blocks;
- _threads = threads;
- _pointsPerThread = pointsPerThread;
-
- size_t count = privateKeys.size();
-
- // Allocate space for public keys on device
- cudaError_t err = initializePublicKeys(count);
-
- if(err) {
- return err;
- }
-
- err = initializeBasePoints();
- if(err) {
- return err;
- }
-
- // Allocate private keys on device
- err = cudaMalloc(&_devPrivate, sizeof(unsigned int) * count * 8);
- if(err) {
- return err;
- }
-
-
- // Clear private keys
- err = cudaMemset(_devPrivate, 0, sizeof(unsigned int) * count * 8);
- if(err) {
- return err;
- }
-
- err = allocateChainBuf(_threads * _blocks * _pointsPerThread);
- if(err) {
- return err;
- }
-
- // Copy private keys to system memory buffer
- unsigned int *tmp = new unsigned int[count * 8];
-
- for(int block = 0; block < _blocks; block++) {
- for(int thread = 0; thread < _threads; thread++) {
- for(int idx = 0; idx < _pointsPerThread; idx++) {
-
- int index = getIndex(block, thread, idx);
-
- splatBigInt(tmp, block, thread, idx, privateKeys[index]);
- }
- }
- }
-
- // Copy private keys to device memory
- err = cudaMemcpy(_devPrivate, tmp, count * sizeof(unsigned int) * 8, cudaMemcpyHostToDevice);
-
- delete[] tmp;
-
- if(err) {
- return err;
- }
-
- return cudaSuccess;
-}
-
-void CudaDeviceKeys::clearPublicKeys()
-{
- cudaFree(_devX);
- cudaFree(_devY);
-
- _devX = NULL;
- _devY = NULL;
-}
-
-void CudaDeviceKeys::clearPrivateKeys()
-{
- cudaFree(_devBasePointX);
- cudaFree(_devBasePointY);
- cudaFree(_devPrivate);
- cudaFree(_devChain);
-
- _devChain = NULL;
- _devBasePointX = NULL;
- _devBasePointY = NULL;
- _devPrivate = NULL;
-}
-
-cudaError_t CudaDeviceKeys::doStep()
-{
- multiplyStepKernel <<<_blocks, _threads>>>(_devPrivate, _pointsPerThread, _step, _devChain, _devBasePointX, _devBasePointY);
-
- // Wait for kernel to complete
- cudaError_t err = cudaDeviceSynchronize();
- fflush(stdout);
- _step++;
- return err;
-}
-
-__global__ void multiplyStepKernel(const unsigned int *privateKeys, int pointsPerThread, int step, unsigned int *chain, const unsigned int *gxPtr, const unsigned int *gyPtr)
-{
- unsigned int *xPtr = ec::getXPtr();
-
- unsigned int *yPtr = ec::getYPtr();
-
- unsigned int gx[8];
- unsigned int gy[8];
-
- for(int i = 0; i < 8; i++) {
- gx[i] = gxPtr[step * 8 + i];
- gy[i] = gyPtr[step * 8 + i];
- }
-
- // Multiply together all (_Gx - x) and then invert
- unsigned int inverse[8] = { 0,0,0,0,0,0,0,1 };
-
- int batchIdx = 0;
- for(int i = 0; i < pointsPerThread; i++) {
-
- unsigned int p[8];
- readInt(privateKeys, i, p);
- unsigned int bit = p[7 - step / 32] & 1 << ((step % 32));
-
- unsigned int x[8];
- readInt(xPtr, i, x);
-
- if(bit != 0) {
- if(!isInfinity(x)) {
- beginBatchAddWithDouble(gx, gy, xPtr, chain, i, batchIdx, inverse);
- batchIdx++;
- }
- }
- }
-
- doBatchInverse(inverse);
-
- for(int i = pointsPerThread - 1; i >= 0; i--) {
-
- unsigned int newX[8];
- unsigned int newY[8];
-
- unsigned int p[8];
- readInt(privateKeys, i, p);
- unsigned int bit = p[7 - step / 32] & 1 << ((step % 32));
-
- unsigned int x[8];
- readInt(xPtr, i, x);
-
- bool infinity = isInfinity(x);
-
- if(bit != 0) {
- if(!infinity) {
- batchIdx--;
- completeBatchAddWithDouble(gx, gy, xPtr, yPtr, i, batchIdx, chain, inverse, newX, newY);
- } else {
- copyBigInt(gx, newX);
- copyBigInt(gy, newY);
- }
-
- writeInt(xPtr, i, newX);
- writeInt(yPtr, i, newY);
- }
- }
-}
-
-bool CudaDeviceKeys::selfTest(const std::vector &privateKeys)
-{
- unsigned int numPoints = _threads * _blocks * _pointsPerThread;
-
- unsigned int *xBuf = new unsigned int[numPoints * 8];
- unsigned int *yBuf = new unsigned int[numPoints * 8];
-
- cudaError_t err = cudaMemcpy(xBuf, _devX, sizeof(unsigned int) * 8 * numPoints, cudaMemcpyDeviceToHost);
-
- err = cudaMemcpy(yBuf, _devY, sizeof(unsigned int) * 8 * numPoints, cudaMemcpyDeviceToHost);
-
-
- for(int block = 0; block < _blocks; block++) {
- for(int thread = 0; thread < _threads; thread++) {
- for(int idx = 0; idx < _pointsPerThread; idx++) {
-
- int index = getIndex(block, thread, idx);
-
- secp256k1::uint256 privateKey = privateKeys[index];
-
- secp256k1::uint256 x = readBigInt(xBuf, block, thread, idx);
- secp256k1::uint256 y = readBigInt(yBuf, block, thread, idx);
-
- secp256k1::ecpoint p1(x, y);
- secp256k1::ecpoint p2 = secp256k1::multiplyPoint(privateKey, secp256k1::G());
-
- if(!secp256k1::pointExists(p1)) {
- throw std::string("Validation failed: invalid point");
- }
-
- if(!secp256k1::pointExists(p2)) {
- throw std::string("Validation failed: invalid point");
- }
-
- if(!(p1 == p2)) {
- throw std::string("Validation failed: points do not match");
- }
- }
- }
- }
-
- return true;
-}
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaDeviceKeys.cuh b/CudaKeySearchDevice/CudaDeviceKeys.cuh
deleted file mode 100644
index 3758b1c..0000000
--- a/CudaKeySearchDevice/CudaDeviceKeys.cuh
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _EC_CUH
-#define _EC_CUH
-
-#include
-
-namespace ec {
- __device__ unsigned int *getXPtr();
-
- __device__ unsigned int *getYPtr();
-}
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaDeviceKeys.h b/CudaKeySearchDevice/CudaDeviceKeys.h
deleted file mode 100644
index f2407f1..0000000
--- a/CudaKeySearchDevice/CudaDeviceKeys.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef _EC_H
-#define _EC_H
-
-#include
-#include
-
-#include
-#include "secp256k1.h"
-
-
-class CudaDeviceKeys {
-
-private:
- int _blocks;
-
- int _threads;
-
- int _pointsPerThread;
-
- unsigned int _numKeys;
-
- unsigned int *_devX;
-
- unsigned int *_devY;
-
- unsigned int *_devPrivate;
-
- unsigned int *_devChain;
-
- unsigned int *_devBasePointX;
-
- unsigned int *_devBasePointY;
-
- int _step;
-
- int getIndex(int block, int thread, int idx);
-
- void splatBigInt(unsigned int *dest, int block, int thread, int idx, const secp256k1::uint256 &i);
-
- secp256k1::uint256 readBigInt(unsigned int *src, int block, int thread, int idx);
-
- cudaError_t allocateChainBuf(unsigned int count);
-
- cudaError_t initializePublicKeys(size_t count);
-
- cudaError_t initializeBasePoints();
-
-
-public:
-
- CudaDeviceKeys()
- {
- _numKeys = 0;
- _devX = NULL;
- _devY = NULL;
- _devPrivate = NULL;
- _devChain = NULL;
- _devBasePointX = NULL;
- _devBasePointY = NULL;
- _step = 0;
- }
-
- ~CudaDeviceKeys()
- {
- clearPublicKeys();
- clearPrivateKeys();
- }
-
- cudaError_t init(int blocks, int threads, int pointsPerThread, const std::vector &privateKeys);
-
- bool selfTest(const std::vector &privateKeys);
-
- cudaError_t doStep();
-
- void clearPrivateKeys();
-
- void clearPublicKeys();
-
-};
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaHashLookup.cu b/CudaKeySearchDevice/CudaHashLookup.cu
deleted file mode 100644
index ce99ef2..0000000
--- a/CudaKeySearchDevice/CudaHashLookup.cu
+++ /dev/null
@@ -1,306 +0,0 @@
-#include
-#include
-#include
-#include
-
-#include "KeySearchDevice.h"
-
-#include "CudaHashLookup.h"
-
-#include "CudaHashLookup.cuh"
-
-#include "Logger.h"
-
-#include "util.h"
-
-#define MAX_TARGETS_CONSTANT_MEM 16
-
-__constant__ unsigned int _TARGET_HASH[MAX_TARGETS_CONSTANT_MEM][5];
-__constant__ unsigned int _NUM_TARGET_HASHES[1];
-__constant__ unsigned int *_BLOOM_FILTER[1];
-__constant__ unsigned int _BLOOM_FILTER_MASK[1];
-__constant__ unsigned long long _BLOOM_FILTER_MASK64[1];
-
-__constant__ unsigned int _USE_BLOOM_FILTER[1];
-
-
-static unsigned int swp(unsigned int x)
-{
- return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24);
-}
-
-static void undoRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
-{
- unsigned int iv[5] = {
- 0x67452301,
- 0xefcdab89,
- 0x98badcfe,
- 0x10325476,
- 0xc3d2e1f0
- };
-
- for(int i = 0; i < 5; i++) {
- hOut[i] = swp(hIn[i]) - iv[(i + 1) % 5];
- }
-}
-
-/**
-Copies the target hashes to constant memory
-*/
-cudaError_t CudaHashLookup::setTargetConstantMemory(const std::vector &targets)
-{
- size_t count = targets.size();
-
- for(size_t i = 0; i < count; i++) {
- unsigned int h[5];
-
- undoRMD160FinalRound(targets[i].h, h);
-
- cudaError_t err = cudaMemcpyToSymbol(_TARGET_HASH, h, sizeof(unsigned int) * 5, i * sizeof(unsigned int) * 5);
-
- if(err) {
- return err;
- }
- }
-
- cudaError_t err = cudaMemcpyToSymbol(_NUM_TARGET_HASHES, &count, sizeof(unsigned int));
- if(err) {
- return err;
- }
-
- unsigned int useBloomFilter = 0;
-
- err = cudaMemcpyToSymbol(_USE_BLOOM_FILTER, &useBloomFilter, sizeof(bool));
- if(err) {
- return err;
- }
-
- return cudaSuccess;
-}
-
-/**
-Returns the optimal bloom filter size in bits given the probability of false-positives and the
-number of hash functions
-*/
-unsigned int CudaHashLookup::getOptimalBloomFilterBits(double p, size_t n)
-{
- double m = 3.6 * ceil((n * log(p)) / log(1 / pow(2, log(2))));
-
- return (unsigned int)ceil(log(m) / log(2));
-}
-
-void CudaHashLookup::initializeBloomFilter(const std::vector &targets, unsigned int *filter, unsigned int mask)
-{
- // Use the low 16 bits of each word in the hash as the index into the bloom filter
- for(unsigned int i = 0; i < targets.size(); i++) {
-
- unsigned int h[5];
-
- undoRMD160FinalRound(targets[i].h, h);
-
- for(int j = 0; j < 5; j++) {
- unsigned int idx = h[j] & mask;
-
- filter[idx / 32] |= (0x01 << (idx % 32));
- }
-
- }
-}
-
-void CudaHashLookup::initializeBloomFilter64(const std::vector &targets, unsigned int *filter, unsigned long long mask)
-{
- for(unsigned int k = 0; k < targets.size(); k++) {
-
- unsigned int hash[5];
-
- unsigned long long idx[5];
-
- undoRMD160FinalRound(targets[k].h, hash);
-
- idx[0] = ((unsigned long long)hash[0] << 32 | hash[1]) & mask;
- idx[1] = ((unsigned long long)hash[2] << 32 | hash[3]) & mask;
- idx[2] = ((unsigned long long)(hash[0]^hash[1]) << 32 | (hash[1]^hash[2])) & mask;
- idx[3] = ((unsigned long long)(hash[2]^hash[3]) << 32 | (hash[3] ^ hash[4])) & mask;
- idx[4] = ((unsigned long long)(hash[0]^hash[3]) << 32 | (hash[1]^hash[3])) & mask;
-
- for(int i = 0; i < 5; i++) {
-
- filter[idx[i] / 32] |= (0x01 << (idx[i] % 32));
- }
- }
-}
-
-/**
-Populates the bloom filter with the target hashes
-*/
-cudaError_t CudaHashLookup::setTargetBloomFilter(const std::vector &targets)
-{
- unsigned int bloomFilterBits = getOptimalBloomFilterBits(1.0e-9, targets.size());
-
- unsigned long long bloomFilterSizeWords = (unsigned long long)1 << (bloomFilterBits - 5);
- unsigned long long bloomFilterBytes = (unsigned long long)1 << (bloomFilterBits - 3);
- unsigned long long bloomFilterMask = (((unsigned long long)1 << bloomFilterBits) - 1);
-
- Logger::log(LogLevel::Info, "Allocating bloom filter (" + util::format("%.1f", (double)bloomFilterBytes/(double)(1024*1024)) + "MB)");
-
- unsigned int *filter = NULL;
-
- try {
- filter = new unsigned int[bloomFilterSizeWords];
- } catch(std::bad_alloc) {
- Logger::log(LogLevel::Error, "Out of system memory");
-
- return cudaErrorMemoryAllocation;
- }
-
- cudaError_t err = cudaMalloc(&_bloomFilterPtr, bloomFilterBytes);
-
- if(err) {
- Logger::log(LogLevel::Error, "Device error: " + std::string(cudaGetErrorString(err)));
- delete[] filter;
- return err;
- }
-
- memset(filter, 0, sizeof(unsigned int) * bloomFilterSizeWords);
- if(bloomFilterBits > 32) {
- initializeBloomFilter64(targets, filter, bloomFilterMask);
- } else {
- initializeBloomFilter(targets, filter, (unsigned int)bloomFilterMask);
- }
-
- // Copy to device
- err = cudaMemcpy(_bloomFilterPtr, filter, sizeof(unsigned int) * bloomFilterSizeWords, cudaMemcpyHostToDevice);
- if(err) {
- cudaFree(_bloomFilterPtr);
- _bloomFilterPtr = NULL;
- delete[] filter;
- return err;
- }
-
- // Copy device memory pointer to constant memory
- err = cudaMemcpyToSymbol(_BLOOM_FILTER, &_bloomFilterPtr, sizeof(unsigned int *));
- if(err) {
- cudaFree(_bloomFilterPtr);
- _bloomFilterPtr = NULL;
- delete[] filter;
- return err;
- }
-
- // Copy device memory pointer to constant memory
- if(bloomFilterBits <= 32) {
- err = cudaMemcpyToSymbol(_BLOOM_FILTER_MASK, &bloomFilterMask, sizeof(unsigned int *));
- if(err) {
- cudaFree(_bloomFilterPtr);
- _bloomFilterPtr = NULL;
- delete[] filter;
- return err;
- }
- } else {
- err = cudaMemcpyToSymbol(_BLOOM_FILTER_MASK64, &bloomFilterMask, sizeof(unsigned long long *));
- if(err) {
- cudaFree(_bloomFilterPtr);
- _bloomFilterPtr = NULL;
- delete[] filter;
- return err;
- }
- }
-
- unsigned int useBloomFilter = bloomFilterBits <= 32 ? 1 : 2;
-
- err = cudaMemcpyToSymbol(_USE_BLOOM_FILTER, &useBloomFilter, sizeof(unsigned int));
-
- delete[] filter;
-
- return err;
-}
-
-/**
-*Copies the target hashes to either constant memory, or the bloom filter depending
-on how many targets there are
-*/
-cudaError_t CudaHashLookup::setTargets(const std::vector &targets)
-{
- cleanup();
-
- if(targets.size() <= MAX_TARGETS_CONSTANT_MEM) {
- return setTargetConstantMemory(targets);
- } else {
- return setTargetBloomFilter(targets);
- }
-}
-
-void CudaHashLookup::cleanup()
-{
- if(_bloomFilterPtr != NULL) {
- cudaFree(_bloomFilterPtr);
- _bloomFilterPtr = NULL;
- }
-}
-
-__device__ bool checkBloomFilter(const unsigned int hash[5])
-{
- bool foundMatch = true;
-
- unsigned int mask = _BLOOM_FILTER_MASK[0];
- unsigned int *bloomFilter = _BLOOM_FILTER[0];
-
- for(int i = 0; i < 5; i++) {
- unsigned int idx = hash[i] & mask;
-
- unsigned int f = bloomFilter[idx / 32];
-
- if((f & (0x01 << (idx % 32))) == 0) {
- foundMatch = false;
- }
- }
-
- return foundMatch;
-}
-
-__device__ bool checkBloomFilter64(const unsigned int hash[5])
-{
- bool foundMatch = true;
-
- unsigned long long mask = _BLOOM_FILTER_MASK64[0];
- unsigned int *bloomFilter = _BLOOM_FILTER[0];
- unsigned long long idx[5];
-
- idx[0] = ((unsigned long long)hash[0] << 32 | hash[1]) & mask;
- idx[1] = ((unsigned long long)hash[2] << 32 | hash[3]) & mask;
- idx[2] = ((unsigned long long)(hash[0] ^ hash[1]) << 32 | (hash[1] ^ hash[2])) & mask;
- idx[3] = ((unsigned long long)(hash[2] ^ hash[3]) << 32 | (hash[3] ^ hash[4])) & mask;
- idx[4] = ((unsigned long long)(hash[0] ^ hash[3]) << 32 | (hash[1] ^ hash[3])) & mask;
-
- for(int i = 0; i < 5; i++) {
- unsigned int f = bloomFilter[idx[i] / 32];
-
- if((f & (0x01 << (idx[i] % 32))) == 0) {
- foundMatch = false;
- }
- }
-
- return foundMatch;
-}
-
-
-__device__ bool checkHash(const unsigned int hash[5])
-{
- bool foundMatch = false;
-
- if(*_USE_BLOOM_FILTER == 1) {
- return checkBloomFilter(hash);
- } else if(*_USE_BLOOM_FILTER == 2) {
- return checkBloomFilter64(hash);
- } else {
- for(int j = 0; j < *_NUM_TARGET_HASHES; j++) {
- bool equal = true;
- for(int i = 0; i < 5; i++) {
- equal &= (hash[i] == _TARGET_HASH[j][i]);
- }
-
- foundMatch |= equal;
- }
- }
-
- return foundMatch;
-}
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaHashLookup.cuh b/CudaKeySearchDevice/CudaHashLookup.cuh
deleted file mode 100644
index 83b3982..0000000
--- a/CudaKeySearchDevice/CudaHashLookup.cuh
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ADDRESS_LOOKUP_CUH
-#define _ADDRESS_LOOKUP_CUH
-
-__device__ bool checkHash(const unsigned int hash[5]);
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaHashLookup.h b/CudaKeySearchDevice/CudaHashLookup.h
deleted file mode 100644
index 8e8d87e..0000000
--- a/CudaKeySearchDevice/CudaHashLookup.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef _HASH_LOOKUP_HOST_H
-#define _HASH_LOOKUP_HOST_H
-
-#include
-
-class CudaHashLookup {
-
-private:
- unsigned int *_bloomFilterPtr;
-
- cudaError_t setTargetBloomFilter(const std::vector &targets);
-
- cudaError_t setTargetConstantMemory(const std::vector &targets);
-
- unsigned int getOptimalBloomFilterBits(double p, size_t n);
-
- void cleanup();
-
- void initializeBloomFilter(const std::vector &targets, unsigned int *filter, unsigned int mask);
-
- void initializeBloomFilter64(const std::vector &targets, unsigned int *filter, unsigned long long mask);
-
-public:
-
- CudaHashLookup()
- {
- _bloomFilterPtr = NULL;
- }
-
- ~CudaHashLookup()
- {
- cleanup();
- }
-
- cudaError_t setTargets(const std::vector &targets);
-};
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaKeySearchDevice.cpp b/CudaKeySearchDevice/CudaKeySearchDevice.cpp
deleted file mode 100644
index aad1fd3..0000000
--- a/CudaKeySearchDevice/CudaKeySearchDevice.cpp
+++ /dev/null
@@ -1,316 +0,0 @@
-#include "CudaKeySearchDevice.h"
-#include "Logger.h"
-#include "util.h"
-#include "cudabridge.h"
-#include "AddressUtil.h"
-
-void CudaKeySearchDevice::cudaCall(cudaError_t err)
-{
- if(err) {
- std::string errStr = cudaGetErrorString(err);
-
- throw KeySearchException(errStr);
- }
-}
-
-CudaKeySearchDevice::CudaKeySearchDevice(int device, int threads, int pointsPerThread, int blocks)
-{
- cuda::CudaDeviceInfo info;
- try {
- info = cuda::getDeviceInfo(device);
- _deviceName = info.name;
- } catch(cuda::CudaException ex) {
- throw KeySearchException(ex.msg);
- }
-
- if(threads <= 0 || threads % 32 != 0) {
- throw KeySearchException("The number of threads must be a multiple of 32");
- }
-
- if(pointsPerThread <= 0) {
- throw KeySearchException("At least 1 point per thread required");
- }
-
- // Specifying blocks on the commandline is depcreated but still supported. If there is no value for
- // blocks, devide the threads evenly among the multi-processors
- if(blocks == 0) {
- if(threads % info.mpCount != 0) {
- throw KeySearchException("The number of threads must be a multiple of " + util::format("%d", info.mpCount));
- }
-
- _threads = threads / info.mpCount;
-
- _blocks = info.mpCount;
-
- while(_threads > 512) {
- _threads /= 2;
- _blocks *= 2;
- }
- } else {
- _threads = threads;
- _blocks = blocks;
- }
-
- _iterations = 0;
-
- _device = device;
-
- _pointsPerThread = pointsPerThread;
-}
-
-void CudaKeySearchDevice::init(const secp256k1::uint256 &start, int compression, const secp256k1::uint256 &stride)
-{
- if(start.cmp(secp256k1::N) >= 0) {
- throw KeySearchException("Starting key is out of range");
- }
-
- _startExponent = start;
-
- _compression = compression;
-
- _stride = stride;
-
- cudaCall(cudaSetDevice(_device));
-
- // Block on kernel calls
- cudaCall(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
-
- // Use a larger portion of shared memory for L1 cache
- cudaCall(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
-
- generateStartingPoints();
-
- cudaCall(allocateChainBuf(_threads * _blocks * _pointsPerThread));
-
- // Set the incrementor
- secp256k1::ecpoint g = secp256k1::G();
- secp256k1::ecpoint p = secp256k1::multiplyPoint(secp256k1::uint256((uint64_t)_threads * _blocks * _pointsPerThread) * _stride, g);
-
- cudaCall(_resultList.init(sizeof(CudaDeviceResult), 16));
-
- cudaCall(setIncrementorPoint(p.x, p.y));
-}
-
-
-void CudaKeySearchDevice::generateStartingPoints()
-{
- uint64_t totalPoints = (uint64_t)_pointsPerThread * _threads * _blocks;
- uint64_t totalMemory = totalPoints * 40;
-
- std::vector exponents;
-
- Logger::log(LogLevel::Info, "Generating " + util::formatThousands(totalPoints) + " starting points (" + util::format("%.1f", (double)totalMemory / (double)(1024 * 1024)) + "MB)");
-
- // Generate key pairs for k, k+1, k+2 ... k +
- secp256k1::uint256 privKey = _startExponent;
-
- exponents.push_back(privKey);
-
- for(uint64_t i = 1; i < totalPoints; i++) {
- privKey = privKey.add(_stride);
- exponents.push_back(privKey);
- }
-
- cudaCall(_deviceKeys.init(_blocks, _threads, _pointsPerThread, exponents));
-
- // Show progress in 10% increments
- double pct = 10.0;
- for(int i = 1; i <= 256; i++) {
- cudaCall(_deviceKeys.doStep());
-
- if(((double)i / 256.0) * 100.0 >= pct) {
- Logger::log(LogLevel::Info, util::format("%.1f%%", pct));
- pct += 10.0;
- }
- }
-
- Logger::log(LogLevel::Info, "Done");
-
- _deviceKeys.clearPrivateKeys();
-}
-
-
-void CudaKeySearchDevice::setTargets(const std::set &targets)
-{
- _targets.clear();
-
- for(std::set::iterator i = targets.begin(); i != targets.end(); ++i) {
- hash160 h(i->value);
- _targets.push_back(h);
- }
-
- cudaCall(_targetLookup.setTargets(_targets));
-}
-
-void CudaKeySearchDevice::doStep()
-{
- uint64_t numKeys = (uint64_t)_blocks * _threads * _pointsPerThread;
-
- try {
- if(_iterations < 2 && _startExponent.cmp(numKeys) <= 0) {
- callKeyFinderKernel(_blocks, _threads, _pointsPerThread, true, _compression);
- } else {
- callKeyFinderKernel(_blocks, _threads, _pointsPerThread, false, _compression);
- }
- } catch(cuda::CudaException ex) {
- throw KeySearchException(ex.msg);
- }
-
- getResultsInternal();
-
- _iterations++;
-}
-
-uint64_t CudaKeySearchDevice::keysPerStep()
-{
- return (uint64_t)_blocks * _threads * _pointsPerThread;
-}
-
-std::string CudaKeySearchDevice::getDeviceName()
-{
- return _deviceName;
-}
-
-void CudaKeySearchDevice::getMemoryInfo(uint64_t &freeMem, uint64_t &totalMem)
-{
- cudaCall(cudaMemGetInfo(&freeMem, &totalMem));
-}
-
-void CudaKeySearchDevice::removeTargetFromList(const unsigned int hash[5])
-{
- size_t count = _targets.size();
-
- while(count) {
- if(memcmp(hash, _targets[count - 1].h, 20) == 0) {
- _targets.erase(_targets.begin() + count - 1);
- return;
- }
- count--;
- }
-}
-
-bool CudaKeySearchDevice::isTargetInList(const unsigned int hash[5])
-{
- size_t count = _targets.size();
-
- while(count) {
- if(memcmp(hash, _targets[count - 1].h, 20) == 0) {
- return true;
- }
- count--;
- }
-
- return false;
-}
-
-uint32_t CudaKeySearchDevice::getPrivateKeyOffset(int thread, int block, int idx)
-{
- // Total number of threads
- int totalThreads = _blocks * _threads;
-
- int base = idx * totalThreads;
-
- // Global ID of the current thread
- int threadId = block * _threads + thread;
-
- return base + threadId;
-}
-
-void CudaKeySearchDevice::getResultsInternal()
-{
- int count = _resultList.size();
- int actualCount = 0;
- if(count == 0) {
- return;
- }
-
- unsigned char *ptr = new unsigned char[count * sizeof(CudaDeviceResult)];
-
- _resultList.read(ptr, count);
-
- for(int i = 0; i < count; i++) {
- struct CudaDeviceResult *rPtr = &((struct CudaDeviceResult *)ptr)[i];
-
- // might be false-positive
- if(!isTargetInList(rPtr->digest)) {
- continue;
- }
- actualCount++;
-
- KeySearchResult minerResult;
-
- // Calculate the private key based on the number of iterations and the current thread
- secp256k1::uint256 offset = (secp256k1::uint256((uint64_t)_blocks * _threads * _pointsPerThread * _iterations) + secp256k1::uint256(getPrivateKeyOffset(rPtr->thread, rPtr->block, rPtr->idx))) * _stride;
- secp256k1::uint256 privateKey = secp256k1::addModN(_startExponent, offset);
-
- minerResult.privateKey = privateKey;
- minerResult.compressed = rPtr->compressed;
-
- memcpy(minerResult.hash, rPtr->digest, 20);
-
- minerResult.publicKey = secp256k1::ecpoint(secp256k1::uint256(rPtr->x, secp256k1::uint256::BigEndian), secp256k1::uint256(rPtr->y, secp256k1::uint256::BigEndian));
-
- removeTargetFromList(rPtr->digest);
-
- _results.push_back(minerResult);
- }
-
- delete[] ptr;
-
- _resultList.clear();
-
- // Reload the bloom filters
- if(actualCount) {
- cudaCall(_targetLookup.setTargets(_targets));
- }
-}
-
-// Verify a private key produces the public key and hash
-bool CudaKeySearchDevice::verifyKey(const secp256k1::uint256 &privateKey, const secp256k1::ecpoint &publicKey, const unsigned int hash[5], bool compressed)
-{
- secp256k1::ecpoint g = secp256k1::G();
-
- secp256k1::ecpoint p = secp256k1::multiplyPoint(privateKey, g);
-
- if(!(p == publicKey)) {
- return false;
- }
-
- unsigned int xWords[8];
- unsigned int yWords[8];
-
- p.x.exportWords(xWords, 8, secp256k1::uint256::BigEndian);
- p.y.exportWords(yWords, 8, secp256k1::uint256::BigEndian);
-
- unsigned int digest[5];
- if(compressed) {
- Hash::hashPublicKeyCompressed(xWords, yWords, digest);
- } else {
- Hash::hashPublicKey(xWords, yWords, digest);
- }
-
- for(int i = 0; i < 5; i++) {
- if(digest[i] != hash[i]) {
- return false;
- }
- }
-
- return true;
-}
-
-size_t CudaKeySearchDevice::getResults(std::vector &resultsOut)
-{
- for(int i = 0; i < _results.size(); i++) {
- resultsOut.push_back(_results[i]);
- }
- _results.clear();
-
- return resultsOut.size();
-}
-
-secp256k1::uint256 CudaKeySearchDevice::getNextKey()
-{
- uint64_t totalPoints = (uint64_t)_pointsPerThread * _threads * _blocks;
-
- return _startExponent + secp256k1::uint256(totalPoints) * _iterations * _stride;
-}
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaKeySearchDevice.cu b/CudaKeySearchDevice/CudaKeySearchDevice.cu
deleted file mode 100644
index cbb79ad..0000000
--- a/CudaKeySearchDevice/CudaKeySearchDevice.cu
+++ /dev/null
@@ -1,261 +0,0 @@
-#include
-#include
-#include
-#include "KeySearchTypes.h"
-#include "CudaKeySearchDevice.h"
-#include "ptx.cuh"
-#include "secp256k1.cuh"
-
-#include "sha256.cuh"
-#include "ripemd160.cuh"
-
-#include "secp256k1.h"
-
-#include "CudaHashLookup.cuh"
-#include "CudaAtomicList.cuh"
-#include "CudaDeviceKeys.cuh"
-
-__constant__ unsigned int _INC_X[8];
-
-__constant__ unsigned int _INC_Y[8];
-
-__constant__ unsigned int *_CHAIN[1];
-
-static unsigned int *_chainBufferPtr = NULL;
-
-
-__device__ void doRMD160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
-{
- const unsigned int iv[5] = {
- 0x67452301,
- 0xefcdab89,
- 0x98badcfe,
- 0x10325476,
- 0xc3d2e1f0
- };
-
- for(int i = 0; i < 5; i++) {
- hOut[i] = endian(hIn[i] + iv[(i + 1) % 5]);
- }
-}
-
-
-/**
- * Allocates device memory for storing the multiplication chain used in
- the batch inversion operation
- */
-cudaError_t allocateChainBuf(unsigned int count)
-{
- cudaError_t err = cudaMalloc(&_chainBufferPtr, count * sizeof(unsigned int) * 8);
-
- if(err) {
- return err;
- }
-
- err = cudaMemcpyToSymbol(_CHAIN, &_chainBufferPtr, sizeof(unsigned int *));
- if(err) {
- cudaFree(_chainBufferPtr);
- }
-
- return err;
-}
-
-void cleanupChainBuf()
-{
- if(_chainBufferPtr != NULL) {
- cudaFree(_chainBufferPtr);
- _chainBufferPtr = NULL;
- }
-}
-
-/**
- *Sets the EC point which all points will be incremented by
- */
-cudaError_t setIncrementorPoint(const secp256k1::uint256 &x, const secp256k1::uint256 &y)
-{
- unsigned int xWords[8];
- unsigned int yWords[8];
-
- x.exportWords(xWords, 8, secp256k1::uint256::BigEndian);
- y.exportWords(yWords, 8, secp256k1::uint256::BigEndian);
-
- cudaError_t err = cudaMemcpyToSymbol(_INC_X, xWords, sizeof(unsigned int) * 8);
- if(err) {
- return err;
- }
-
- return cudaMemcpyToSymbol(_INC_Y, yWords, sizeof(unsigned int) * 8);
-}
-
-
-
-__device__ void hashPublicKey(const unsigned int *x, const unsigned int *y, unsigned int *digestOut)
-{
- unsigned int hash[8];
-
- sha256PublicKey(x, y, hash);
-
- // Swap to little-endian
- for(int i = 0; i < 8; i++) {
- hash[i] = endian(hash[i]);
- }
-
- ripemd160sha256NoFinal(hash, digestOut);
-}
-
-__device__ void hashPublicKeyCompressed(const unsigned int *x, unsigned int yParity, unsigned int *digestOut)
-{
- unsigned int hash[8];
-
- sha256PublicKeyCompressed(x, yParity, hash);
-
- // Swap to little-endian
- for(int i = 0; i < 8; i++) {
- hash[i] = endian(hash[i]);
- }
-
- ripemd160sha256NoFinal(hash, digestOut);
-}
-
-
-__device__ void setResultFound(int idx, bool compressed, unsigned int x[8], unsigned int y[8], unsigned int digest[5])
-{
- CudaDeviceResult r;
-
- r.block = blockIdx.x;
- r.thread = threadIdx.x;
- r.idx = idx;
- r.compressed = compressed;
-
- for(int i = 0; i < 8; i++) {
- r.x[i] = x[i];
- r.y[i] = y[i];
- }
-
- doRMD160FinalRound(digest, r.digest);
-
- atomicListAdd(&r, sizeof(r));
-}
-
-__device__ void doIteration(int pointsPerThread, int compression)
-{
- unsigned int *chain = _CHAIN[0];
- unsigned int *xPtr = ec::getXPtr();
- unsigned int *yPtr = ec::getYPtr();
-
- // Multiply together all (_Gx - x) and then invert
- unsigned int inverse[8] = {0,0,0,0,0,0,0,1};
- for(int i = 0; i < pointsPerThread; i++) {
- unsigned int x[8];
-
- unsigned int digest[5];
-
- readInt(xPtr, i, x);
-
- if(compression == PointCompressionType::UNCOMPRESSED || compression == PointCompressionType::BOTH) {
- unsigned int y[8];
- readInt(yPtr, i, y);
-
- hashPublicKey(x, y, digest);
-
- if(checkHash(digest)) {
- setResultFound(i, false, x, y, digest);
- }
- }
-
- if(compression == PointCompressionType::COMPRESSED || compression == PointCompressionType::BOTH) {
- hashPublicKeyCompressed(x, readIntLSW(yPtr, i), digest);
-
- if(checkHash(digest)) {
- unsigned int y[8];
- readInt(yPtr, i, y);
- setResultFound(i, true, x, y, digest);
- }
- }
-
- beginBatchAdd(_INC_X, x, chain, i, i, inverse);
- }
-
- doBatchInverse(inverse);
-
- for(int i = pointsPerThread - 1; i >= 0; i--) {
-
- unsigned int newX[8];
- unsigned int newY[8];
-
- completeBatchAdd(_INC_X, _INC_Y, xPtr, yPtr, i, i, chain, inverse, newX, newY);
-
- writeInt(xPtr, i, newX);
- writeInt(yPtr, i, newY);
- }
-}
-
-__device__ void doIterationWithDouble(int pointsPerThread, int compression)
-{
- unsigned int *chain = _CHAIN[0];
- unsigned int *xPtr = ec::getXPtr();
- unsigned int *yPtr = ec::getYPtr();
-
- // Multiply together all (_Gx - x) and then invert
- unsigned int inverse[8] = {0,0,0,0,0,0,0,1};
- for(int i = 0; i < pointsPerThread; i++) {
- unsigned int x[8];
-
- unsigned int digest[5];
-
- readInt(xPtr, i, x);
-
- // uncompressed
- if(compression == PointCompressionType::UNCOMPRESSED || compression == PointCompressionType::BOTH) {
- unsigned int y[8];
- readInt(yPtr, i, y);
- hashPublicKey(x, y, digest);
-
- if(checkHash(digest)) {
- setResultFound(i, false, x, y, digest);
- }
- }
-
- // compressed
- if(compression == PointCompressionType::COMPRESSED || compression == PointCompressionType::BOTH) {
-
- hashPublicKeyCompressed(x, readIntLSW(yPtr, i), digest);
-
- if(checkHash(digest)) {
-
- unsigned int y[8];
- readInt(yPtr, i, y);
-
- setResultFound(i, true, x, y, digest);
- }
- }
-
- beginBatchAddWithDouble(_INC_X, _INC_Y, xPtr, chain, i, i, inverse);
- }
-
- doBatchInverse(inverse);
-
- for(int i = pointsPerThread - 1; i >= 0; i--) {
-
- unsigned int newX[8];
- unsigned int newY[8];
-
- completeBatchAddWithDouble(_INC_X, _INC_Y, xPtr, yPtr, i, i, chain, inverse, newX, newY);
-
- writeInt(xPtr, i, newX);
- writeInt(yPtr, i, newY);
- }
-}
-
-/**
-* Performs a single iteration
-*/
-__global__ void keyFinderKernel(int points, int compression)
-{
- doIteration(points, compression);
-}
-
-__global__ void keyFinderKernelWithDouble(int points, int compression)
-{
- doIterationWithDouble(points, compression);
-}
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaKeySearchDevice.h b/CudaKeySearchDevice/CudaKeySearchDevice.h
deleted file mode 100644
index fb8d194..0000000
--- a/CudaKeySearchDevice/CudaKeySearchDevice.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef _CUDA_KEY_SEARCH_DEVICE
-#define _CUDA_KEY_SEARCH_DEVICE
-
-#include "KeySearchDevice.h"
-#include
-#include
-#include "secp256k1.h"
-#include "CudaDeviceKeys.h"
-#include "CudaHashLookup.h"
-#include "CudaAtomicList.h"
-#include "cudaUtil.h"
-
-// Structures that exist on both host and device side
-struct CudaDeviceResult {
- int thread;
- int block;
- int idx;
- bool compressed;
- unsigned int x[8];
- unsigned int y[8];
- unsigned int digest[5];
-};
-
-class CudaKeySearchDevice : public KeySearchDevice {
-
-private:
-
- int _device;
-
- int _blocks;
-
- int _threads;
-
- int _pointsPerThread;
-
- int _compression;
-
- std::vector _results;
-
- std::string _deviceName;
-
- secp256k1::uint256 _startExponent;
-
- uint64_t _iterations;
-
- void cudaCall(cudaError_t err);
-
- void generateStartingPoints();
-
- CudaDeviceKeys _deviceKeys;
-
- CudaAtomicList _resultList;
-
- CudaHashLookup _targetLookup;
-
- void getResultsInternal();
-
- std::vector _targets;
-
- bool isTargetInList(const unsigned int hash[5]);
-
- void removeTargetFromList(const unsigned int hash[5]);
-
- uint32_t getPrivateKeyOffset(int thread, int block, int point);
-
- secp256k1::uint256 _stride;
-
- bool verifyKey(const secp256k1::uint256 &privateKey, const secp256k1::ecpoint &publicKey, const unsigned int hash[5], bool compressed);
-
-public:
-
- CudaKeySearchDevice(int device, int threads, int pointsPerThread, int blocks = 0);
-
- virtual void init(const secp256k1::uint256 &start, int compression, const secp256k1::uint256 &stride);
-
- virtual void doStep();
-
- virtual void setTargets(const std::set &targets);
-
- virtual size_t getResults(std::vector &results);
-
- virtual uint64_t keysPerStep();
-
- virtual std::string getDeviceName();
-
- virtual void getMemoryInfo(uint64_t &freeMem, uint64_t &totalMem);
-
- virtual secp256k1::uint256 getNextKey();
-};
-
-#endif
\ No newline at end of file
diff --git a/CudaKeySearchDevice/CudaKeySearchDevice.vcxproj b/CudaKeySearchDevice/CudaKeySearchDevice.vcxproj
deleted file mode 100644
index 490c53e..0000000
--- a/CudaKeySearchDevice/CudaKeySearchDevice.vcxproj
+++ /dev/null
@@ -1,119 +0,0 @@
-
-
-
-
- Debug
- x64
-
-
- Release
- x64
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- {150af404-1f80-4a13-855b-4383c4a3326f}
-
-
-
- {CCA3D02C-5E5A-4A24-B34B-5961DFA93946}
- CudaKeySearchDevice
- 10.0
-
-
-
- StaticLibrary
- true
- MultiByte
- v142
-
-
- StaticLibrary
- false
- true
- MultiByte
- v142
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
-
-
-
- Level4
- Disabled
- WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
- %(AdditionalUsingDirectories)
- $(SolutionDir)\secp256k1lib;$(SolutionDir)\KeyFinderLib;$(SolutionDir)\Logger;$(SolutionDir)\Util;$(SolutionDir)\CudaMath;$(SolutionDir)\cudaUtil;$(SolutionDir)\AddressUtil;$(CUDA_INCLUDE)
-
-
- true
- Console
- cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
-
-
- 64
- true
- %(CodeGeneration)
-
-
-
-
- Level4
- MaxSpeed
- true
- true
- WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
- %(AdditionalUsingDirectories)
- $(SolutionDir)\secp256k1lib;$(SolutionDir)\KeyFinderLib;$(SolutionDir)\Logger;$(SolutionDir)\Util;$(SolutionDir)\CudaMath;$(SolutionDir)\cudaUtil;$(SolutionDir)\AddressUtil;$(CUDA_INCLUDE)
-
-
- true
- true
- true
- Console
- cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
-
-
- 64
- true
- %(CodeGeneration)
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/CudaKeySearchDevice/Makefile b/CudaKeySearchDevice/Makefile
deleted file mode 100644
index 5782c63..0000000
--- a/CudaKeySearchDevice/Makefile
+++ /dev/null
@@ -1,22 +0,0 @@
-NAME=CudaKeySearchDevice
-CPPSRC:=$(wildcard *.cpp)
-CUSRC:=$(wildcard *.cu)
-
-all: cuda
-
-cuda:
- for file in ${CPPSRC} ; do\
- ${CXX} -c $$file ${INCLUDE} -I${CUDA_INCLUDE} ${CXXFLAGS};\
- done
-
- for file in ${CUSRC} ; do\
- ${NVCC} -c $$file -o $$file".o" ${NVCCFLAGS} -rdc=true ${INCLUDE} -I${CUDA_INCLUDE} -I${CUDA_MATH};\
- done
-
- ${NVCC} -dlink -o cuda_libs.o *.cu.o -lcudadevrt -lcudart
-
- ar rvs ${LIBDIR}/lib$(NAME).a *.o
-
-clean:
- rm -f *.o *.cu.o
- rm -f *.a
\ No newline at end of file
diff --git a/CudaKeySearchDevice/cudabridge.cu b/CudaKeySearchDevice/cudabridge.cu
deleted file mode 100644
index 33325d9..0000000
--- a/CudaKeySearchDevice/cudabridge.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "cudabridge.h"
-
-
-__global__ void keyFinderKernel(int points, int compression);
-__global__ void keyFinderKernelWithDouble(int points, int compression);
-
-void callKeyFinderKernel(int blocks, int threads, int points, bool useDouble, int compression)
-{
- if(useDouble) {
- keyFinderKernelWithDouble <<>>(points, compression);
- } else {
- keyFinderKernel <<>> (points, compression);
- }
- waitForKernel();
-}
-
-
-void waitForKernel()
-{
- // Check for kernel launch error
- cudaError_t err = cudaGetLastError();
-
- if(err != cudaSuccess) {
- throw cuda::CudaException(err);
- }
-
- // Wait for kernel to complete
- err = cudaDeviceSynchronize();
- fflush(stdout);
- if(err != cudaSuccess) {
- throw cuda::CudaException(err);
- }
-}
\ No newline at end of file
diff --git a/CudaKeySearchDevice/cudabridge.h b/CudaKeySearchDevice/cudabridge.h
deleted file mode 100644
index eaafe3a..0000000
--- a/CudaKeySearchDevice/cudabridge.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef _BRIDGE_H
-#define _BRIDGE_H
-
-#include
-#include
-#include
-#include "cudaUtil.h"
-#include "secp256k1.h"
-
-
-void callKeyFinderKernel(int blocks, int threads, int points, bool useDouble, int compression);
-
-void waitForKernel();
-
-cudaError_t setIncrementorPoint(const secp256k1::uint256 &x, const secp256k1::uint256 &y);
-cudaError_t allocateChainBuf(unsigned int count);
-void cleanupChainBuf();
-
-#endif
\ No newline at end of file
diff --git a/KeyFinder/ConfigFile.cpp b/KeyFinder/ConfigFile.cpp
index 83098ae..4cc7fc2 100644
--- a/KeyFinder/ConfigFile.cpp
+++ b/KeyFinder/ConfigFile.cpp
@@ -43,4 +43,4 @@ std::map ConfigFileReader::read()
}
return entries;
-}
\ No newline at end of file
+}
diff --git a/KeyFinder/ConfigFile.h b/KeyFinder/ConfigFile.h
index ed48116..312d21b 100644
--- a/KeyFinder/ConfigFile.h
+++ b/KeyFinder/ConfigFile.h
@@ -1,5 +1,5 @@
-#ifndef _CONFIG_FILE_H
-#define _CONFIG_FILE_H
+#ifndef CONFIG_FILE_H
+#define CONFIG_FILE_H
#include
#include
+
+
+ Level3
+ Disabled
+ true
+ true
+
+
Level3
@@ -154,6 +239,7 @@
true
OpenCL.lib;Shlwapi.lib;BCrypt.lib;%(AdditionalDependencies)
$(OPENCL_LIB);%(AdditionalLibraryDirectories)
+ Console
diff --git a/clMath/clMath.vcxproj b/clMath/clMath.vcxproj
index 5236bff..c434979 100644
--- a/clMath/clMath.vcxproj
+++ b/clMath/clMath.vcxproj
@@ -5,6 +5,14 @@
Debug
Win32
+
+ Performance Release
+ Win32
+
+
+ Performance Release
+ x64
+
Release
Win32
@@ -22,32 +30,46 @@
15.0
{83327841-C283-4D46-A873-97AC674C68AC}
clMath
- 10.0
+ 10.0.19041.0
Application
true
- v141
+ ClangCl
+ MultiByte
+
+
+ Application
+ false
+ ClangCl
MultiByte
Application
false
- v141
+ ClangCl
true
MultiByte
Application
true
- v142
+ ClangCl
MultiByte
+
+ Application
+ false
+ ClangCl
+ MultiByte
+ true
+ x64
+
Application
false
- v142
+ ClangCL
true
MultiByte
@@ -59,12 +81,18 @@
+
+
+
+
+
+
@@ -92,7 +120,7 @@
true
-
+
Level3
Disabled
@@ -100,6 +128,28 @@
true
+
+
+ EnableAllWarnings
+ Disabled
+ true
+ true
+
+
+
+
+ Level3
+ true
+ true
+ None
+ MaxSpeed
+ AnySuitable
+ true
+ Speed
+ true
+ true
+
+
Level3
diff --git a/clMath/ripemd160.cl b/clMath/ripemd160.cl
index 300ead1..97bd624 100644
--- a/clMath/ripemd160.cl
+++ b/clMath/ripemd160.cl
@@ -1,24 +1,17 @@
-#ifndef _RIPEMD160_CL
-#define _RIPEMD160_CL
+#ifndef RIPEMD160_CL
+#define RIPEMD160_CL
+#ifndef endian
+#define endian(x) ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24)
+#endif
-__constant unsigned int _RIPEMD160_IV[5] = {
- 0x67452301,
- 0xefcdab89,
- 0x98badcfe,
- 0x10325476,
- 0xc3d2e1f0
+__constant unsigned int RIPEMD160_IV[5] = {
+ 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0,
};
-__constant unsigned int _K0 = 0x5a827999;
-__constant unsigned int _K1 = 0x6ed9eba1;
-__constant unsigned int _K2 = 0x8f1bbcdc;
-__constant unsigned int _K3 = 0xa953fd4e;
-
-__constant unsigned int _K4 = 0x7a6d76e9;
-__constant unsigned int _K5 = 0x6d703ef3;
-__constant unsigned int _K6 = 0x5c4dd124;
-__constant unsigned int _K7 = 0x50a28be6;
+__constant unsigned int K[8] = {
+ 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xa953fd4e, 0x7a6d76e9, 0x6d703ef3, 0x5c4dd124, 0x50a28be6
+};
#define rotl(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
@@ -38,22 +31,22 @@ __constant unsigned int _K7 = 0x50a28be6;
c = rotl((c), 10)
#define GG(a, b, c, d, e, x, s)\
- a += G((b), (c), (d)) + (x) + _K0;\
+ a += G((b), (c), (d)) + (x) + K[0];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
#define HH(a, b, c, d, e, x, s)\
- a += H((b), (c), (d)) + (x) + _K1;\
+ a += H((b), (c), (d)) + (x) + K[1];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
#define II(a, b, c, d, e, x, s)\
- a += I((b), (c), (d)) + (x) + _K2;\
+ a += I((b), (c), (d)) + (x) + K[2];\
a = rotl((a), (s)) + e;\
c = rotl((c), 10)
#define JJ(a, b, c, d, e, x, s)\
- a += J((b), (c), (d)) + (x) + _K3;\
+ a += J((b), (c), (d)) + (x) + K[3];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
@@ -63,432 +56,257 @@ __constant unsigned int _K7 = 0x50a28be6;
c = rotl((c), 10)
#define GGG(a, b, c, d, e, x, s)\
- a += G((b), (c), (d)) + x + _K4;\
+ a += G((b), (c), (d)) + x + K[4];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
#define HHH(a, b, c, d, e, x, s)\
- a += H((b), (c), (d)) + (x) + _K5;\
+ a += H((b), (c), (d)) + (x) + K[5];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
#define III(a, b, c, d, e, x, s)\
- a += I((b), (c), (d)) + (x) + _K6;\
+ a += I((b), (c), (d)) + (x) + K[6];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
#define JJJ(a, b, c, d, e, x, s)\
- a += J((b), (c), (d)) + (x) + _K7;\
+ a += J((b), (c), (d)) + (x) + K[7];\
a = rotl((a), (s)) + (e);\
c = rotl((c), 10)
-
-void ripemd160sha256(const unsigned int x[8], unsigned int digest[5])
+void ripemd160p1(const unsigned int x[8], unsigned int digest[5])
{
- unsigned int a1 = _RIPEMD160_IV[0];
- unsigned int b1 = _RIPEMD160_IV[1];
- unsigned int c1 = _RIPEMD160_IV[2];
- unsigned int d1 = _RIPEMD160_IV[3];
- unsigned int e1 = _RIPEMD160_IV[4];
-
- const unsigned int x8 = 0x00000080;
- const unsigned int x14 = 256;
+ __private unsigned int a = RIPEMD160_IV[0];
+ __private unsigned int b = RIPEMD160_IV[1];
+ __private unsigned int c = RIPEMD160_IV[2];
+ __private unsigned int d = RIPEMD160_IV[3];
+ __private unsigned int e = RIPEMD160_IV[4];
/* round 1 */
- FF(a1, b1, c1, d1, e1, x[0], 11);
- FF(e1, a1, b1, c1, d1, x[1], 14);
- FF(d1, e1, a1, b1, c1, x[2], 15);
- FF(c1, d1, e1, a1, b1, x[3], 12);
- FF(b1, c1, d1, e1, a1, x[4], 5);
- FF(a1, b1, c1, d1, e1, x[5], 8);
- FF(e1, a1, b1, c1, d1, x[6], 7);
- FF(d1, e1, a1, b1, c1, x[7], 9);
- FF(c1, d1, e1, a1, b1, x8, 11);
- FF(b1, c1, d1, e1, a1, 0, 13);
- FF(a1, b1, c1, d1, e1, 0, 14);
- FF(e1, a1, b1, c1, d1, 0, 15);
- FF(d1, e1, a1, b1, c1, 0, 6);
- FF(c1, d1, e1, a1, b1, 0, 7);
- FF(b1, c1, d1, e1, a1, x14, 9);
- FF(a1, b1, c1, d1, e1, 0, 8);
+ FF(a, b, c, d, e, x[0], 11);
+ FF(e, a, b, c, d, x[1], 14);
+ FF(d, e, a, b, c, x[2], 15);
+ FF(c, d, e, a, b, x[3], 12);
+ FF(b, c, d, e, a, x[4], 5);
+ FF(a, b, c, d, e, x[5], 8);
+ FF(e, a, b, c, d, x[6], 7);
+ FF(d, e, a, b, c, x[7], 9);
+ FF(c, d, e, a, b, 128, 11);
+ FF(b, c, d, e, a, 0, 13);
+ FF(a, b, c, d, e, 0, 14);
+ FF(e, a, b, c, d, 0, 15);
+ FF(d, e, a, b, c, 0, 6);
+ FF(c, d, e, a, b, 0, 7);
+ FF(b, c, d, e, a, 256, 9);
+ FF(a, b, c, d, e, 0, 8);
/* round 2 */
- GG(e1, a1, b1, c1, d1, x[7], 7);
- GG(d1, e1, a1, b1, c1, x[4], 6);
- GG(c1, d1, e1, a1, b1, 0, 8);
- GG(b1, c1, d1, e1, a1, x[1], 13);
- GG(a1, b1, c1, d1, e1, 0, 11);
- GG(e1, a1, b1, c1, d1, x[6], 9);
- GG(d1, e1, a1, b1, c1, 0, 7);
- GG(c1, d1, e1, a1, b1, x[3], 15);
- GG(b1, c1, d1, e1, a1, 0, 7);
- GG(a1, b1, c1, d1, e1, x[0], 12);
- GG(e1, a1, b1, c1, d1, 0, 15);
- GG(d1, e1, a1, b1, c1, x[5], 9);
- GG(c1, d1, e1, a1, b1, x[2], 11);
- GG(b1, c1, d1, e1, a1, x14, 7);
- GG(a1, b1, c1, d1, e1, 0, 13);
- GG(e1, a1, b1, c1, d1, x8, 12);
+ GG(e, a, b, c, d, x[7], 7);
+ GG(d, e, a, b, c, x[4], 6);
+ GG(c, d, e, a, b, 0, 8);
+ GG(b, c, d, e, a, x[1], 13);
+ GG(a, b, c, d, e, 0, 11);
+ GG(e, a, b, c, d, x[6], 9);
+ GG(d, e, a, b, c, 0, 7);
+ GG(c, d, e, a, b, x[3], 15);
+ GG(b, c, d, e, a, 0, 7);
+ GG(a, b, c, d, e, x[0], 12);
+ GG(e, a, b, c, d, 0, 15);
+ GG(d, e, a, b, c, x[5], 9);
+ GG(c, d, e, a, b, x[2], 11);
+ GG(b, c, d, e, a, 256, 7);
+ GG(a, b, c, d, e, 0, 13);
+ GG(e, a, b, c, d, 0x80, 12);
/* round 3 */
- HH(d1, e1, a1, b1, c1, x[3], 11);
- HH(c1, d1, e1, a1, b1, 0, 13);
- HH(b1, c1, d1, e1, a1, x14, 6);
- HH(a1, b1, c1, d1, e1, x[4], 7);
- HH(e1, a1, b1, c1, d1, 0, 14);
- HH(d1, e1, a1, b1, c1, 0, 9);
- HH(c1, d1, e1, a1, b1, x8, 13);
- HH(b1, c1, d1, e1, a1, x[1], 15);
- HH(a1, b1, c1, d1, e1, x[2], 14);
- HH(e1, a1, b1, c1, d1, x[7], 8);
- HH(d1, e1, a1, b1, c1, x[0], 13);
- HH(c1, d1, e1, a1, b1, x[6], 6);
- HH(b1, c1, d1, e1, a1, 0, 5);
- HH(a1, b1, c1, d1, e1, 0, 12);
- HH(e1, a1, b1, c1, d1, x[5], 7);
- HH(d1, e1, a1, b1, c1, 0, 5);
+ HH(d, e, a, b, c, x[3], 11);
+ HH(c, d, e, a, b, 0, 13);
+ HH(b, c, d, e, a, 256, 6);
+ HH(a, b, c, d, e, x[4], 7);
+ HH(e, a, b, c, d, 0, 14);
+ HH(d, e, a, b, c, 0, 9);
+ HH(c, d, e, a, b, 0x80, 13);
+ HH(b, c, d, e, a, x[1], 15);
+ HH(a, b, c, d, e, x[2], 14);
+ HH(e, a, b, c, d, x[7], 8);
+ HH(d, e, a, b, c, x[0], 13);
+ HH(c, d, e, a, b, x[6], 6);
+ HH(b, c, d, e, a, 0, 5);
+ HH(a, b, c, d, e, 0, 12);
+ HH(e, a, b, c, d, x[5], 7);
+ HH(d, e, a, b, c, 0, 5);
/* round 4 */
- II(c1, d1, e1, a1, b1, x[1], 11);
- II(b1, c1, d1, e1, a1, 0, 12);
- II(a1, b1, c1, d1, e1, 0, 14);
- II(e1, a1, b1, c1, d1, 0, 15);
- II(d1, e1, a1, b1, c1, x[0], 14);
- II(c1, d1, e1, a1, b1, x8, 15);
- II(b1, c1, d1, e1, a1, 0, 9);
- II(a1, b1, c1, d1, e1, x[4], 8);
- II(e1, a1, b1, c1, d1, 0, 9);
- II(d1, e1, a1, b1, c1, x[3], 14);
- II(c1, d1, e1, a1, b1, x[7], 5);
- II(b1, c1, d1, e1, a1, 0, 6);
- II(a1, b1, c1, d1, e1, x14, 8);
- II(e1, a1, b1, c1, d1, x[5], 6);
- II(d1, e1, a1, b1, c1, x[6], 5);
- II(c1, d1, e1, a1, b1, x[2], 12);
+ II(c, d, e, a, b, x[1], 11);
+ II(b, c, d, e, a, 0, 12);
+ II(a, b, c, d, e, 0, 14);
+ II(e, a, b, c, d, 0, 15);
+ II(d, e, a, b, c, x[0], 14);
+ II(c, d, e, a, b, 0x80, 15);
+ II(b, c, d, e, a, 0, 9);
+ II(a, b, c, d, e, x[4], 8);
+ II(e, a, b, c, d, 0, 9);
+ II(d, e, a, b, c, x[3], 14);
+ II(c, d, e, a, b, x[7], 5);
+ II(b, c, d, e, a, 0, 6);
+ II(a, b, c, d, e, 256, 8);
+ II(e, a, b, c, d, x[5], 6);
+ II(d, e, a, b, c, x[6], 5);
+ II(c, d, e, a, b, x[2], 12);
/* round 5 */
- JJ(b1, c1, d1, e1, a1, x[4], 9);
- JJ(a1, b1, c1, d1, e1, x[0], 15);
- JJ(e1, a1, b1, c1, d1, x[5], 5);
- JJ(d1, e1, a1, b1, c1, 0, 11);
- JJ(c1, d1, e1, a1, b1, x[7], 6);
- JJ(b1, c1, d1, e1, a1, 0, 8);
- JJ(a1, b1, c1, d1, e1, x[2], 13);
- JJ(e1, a1, b1, c1, d1, 0, 12);
- JJ(d1, e1, a1, b1, c1, x14, 5);
- JJ(c1, d1, e1, a1, b1, x[1], 12);
- JJ(b1, c1, d1, e1, a1, x[3], 13);
- JJ(a1, b1, c1, d1, e1, x8, 14);
- JJ(e1, a1, b1, c1, d1, 0, 11);
- JJ(d1, e1, a1, b1, c1, x[6], 8);
- JJ(c1, d1, e1, a1, b1, 0, 5);
- JJ(b1, c1, d1, e1, a1, 0, 6);
-
- unsigned int a2 = _RIPEMD160_IV[0];
- unsigned int b2 = _RIPEMD160_IV[1];
- unsigned int c2 = _RIPEMD160_IV[2];
- unsigned int d2 = _RIPEMD160_IV[3];
- unsigned int e2 = _RIPEMD160_IV[4];
+ JJ(b, c, d, e, a, x[4], 9);
+ JJ(a, b, c, d, e, x[0], 15);
+ JJ(e, a, b, c, d, x[5], 5);
+ JJ(d, e, a, b, c, 0, 11);
+ JJ(c, d, e, a, b, x[7], 6);
+ JJ(b, c, d, e, a, 0, 8);
+ JJ(a, b, c, d, e, x[2], 13);
+ JJ(e, a, b, c, d, 0, 12);
+ JJ(d, e, a, b, c, 256, 5);
+ JJ(c, d, e, a, b, x[1], 12);
+ JJ(b, c, d, e, a, x[3], 13);
+ JJ(a, b, c, d, e, 0x80, 14);
+ JJ(e, a, b, c, d, 0, 11);
+ JJ(d, e, a, b, c, x[6], 8);
+ JJ(c, d, e, a, b, 0, 5);
+ JJ(b, c, d, e, a, 0, 6);
+
+ digest[0] = c;
+ digest[1] = d;
+ digest[2] = e;
+ digest[3] = a;
+ digest[4] = b;
+}
+
+void ripemd160p2(const unsigned int x[8], unsigned int digest[5])
+{
+ __private unsigned int a = RIPEMD160_IV[0];
+ __private unsigned int b = RIPEMD160_IV[1];
+ __private unsigned int c = RIPEMD160_IV[2];
+ __private unsigned int d = RIPEMD160_IV[3];
+ __private unsigned int e = RIPEMD160_IV[4];
/* parallel round 1 */
- JJJ(a2, b2, c2, d2, e2, x[5], 8);
- JJJ(e2, a2, b2, c2, d2, x14, 9);
- JJJ(d2, e2, a2, b2, c2, x[7], 9);
- JJJ(c2, d2, e2, a2, b2, x[0], 11);
- JJJ(b2, c2, d2, e2, a2, 0, 13);
- JJJ(a2, b2, c2, d2, e2, x[2], 15);
- JJJ(e2, a2, b2, c2, d2, 0, 15);
- JJJ(d2, e2, a2, b2, c2, x[4], 5);
- JJJ(c2, d2, e2, a2, b2, 0, 7);
- JJJ(b2, c2, d2, e2, a2, x[6], 7);
- JJJ(a2, b2, c2, d2, e2, 0, 8);
- JJJ(e2, a2, b2, c2, d2, x8, 11);
- JJJ(d2, e2, a2, b2, c2, x[1], 14);
- JJJ(c2, d2, e2, a2, b2, 0, 14);
- JJJ(b2, c2, d2, e2, a2, x[3], 12);
- JJJ(a2, b2, c2, d2, e2, 0, 6);
+ JJJ(a, b, c, d, e, x[5], 8);
+ JJJ(e, a, b, c, d, 256, 9);
+ JJJ(d, e, a, b, c, x[7], 9);
+ JJJ(c, d, e, a, b, x[0], 11);
+ JJJ(b, c, d, e, a, 0, 13);
+ JJJ(a, b, c, d, e, x[2], 15);
+ JJJ(e, a, b, c, d, 0, 15);
+ JJJ(d, e, a, b, c, x[4], 5);
+ JJJ(c, d, e, a, b, 0, 7);
+ JJJ(b, c, d, e, a, x[6], 7);
+ JJJ(a, b, c, d, e, 0, 8);
+ JJJ(e, a, b, c, d, 0x80, 11);
+ JJJ(d, e, a, b, c, x[1], 14);
+ JJJ(c, d, e, a, b, 0, 14);
+ JJJ(b, c, d, e, a, x[3], 12);
+ JJJ(a, b, c, d, e, 0, 6);
/* parallel round 2 */
- III(e2, a2, b2, c2, d2, x[6], 9);
- III(d2, e2, a2, b2, c2, 0, 13);
- III(c2, d2, e2, a2, b2, x[3], 15);
- III(b2, c2, d2, e2, a2, x[7], 7);
- III(a2, b2, c2, d2, e2, x[0], 12);
- III(e2, a2, b2, c2, d2, 0, 8);
- III(d2, e2, a2, b2, c2, x[5], 9);
- III(c2, d2, e2, a2, b2, 0, 11);
- III(b2, c2, d2, e2, a2, x14, 7);
- III(a2, b2, c2, d2, e2, 0, 7);
- III(e2, a2, b2, c2, d2, x8, 12);
- III(d2, e2, a2, b2, c2, 0, 7);
- III(c2, d2, e2, a2, b2, x[4], 6);
- III(b2, c2, d2, e2, a2, 0, 15);
- III(a2, b2, c2, d2, e2, x[1], 13);
- III(e2, a2, b2, c2, d2, x[2], 11);
+ III(e, a, b, c, d, x[6], 9);
+ III(d, e, a, b, c, 0, 13);
+ III(c, d, e, a, b, x[3], 15);
+ III(b, c, d, e, a, x[7], 7);
+ III(a, b, c, d, e, x[0], 12);
+ III(e, a, b, c, d, 0, 8);
+ III(d, e, a, b, c, x[5], 9);
+ III(c, d, e, a, b, 0, 11);
+ III(b, c, d, e, a, 256, 7);
+ III(a, b, c, d, e, 0, 7);
+ III(e, a, b, c, d, 0x80, 12);
+ III(d, e, a, b, c, 0, 7);
+ III(c, d, e, a, b, x[4], 6);
+ III(b, c, d, e, a, 0, 15);
+ III(a, b, c, d, e, x[1], 13);
+ III(e, a, b, c, d, x[2], 11);
/* parallel round 3 */
- HHH(d2, e2, a2, b2, c2, 0, 9);
- HHH(c2, d2, e2, a2, b2, x[5], 7);
- HHH(b2, c2, d2, e2, a2, x[1], 15);
- HHH(a2, b2, c2, d2, e2, x[3], 11);
- HHH(e2, a2, b2, c2, d2, x[7], 8);
- HHH(d2, e2, a2, b2, c2, x14, 6);
- HHH(c2, d2, e2, a2, b2, x[6], 6);
- HHH(b2, c2, d2, e2, a2, 0, 14);
- HHH(a2, b2, c2, d2, e2, 0, 12);
- HHH(e2, a2, b2, c2, d2, x8, 13);
- HHH(d2, e2, a2, b2, c2, 0, 5);
- HHH(c2, d2, e2, a2, b2, x[2], 14);
- HHH(b2, c2, d2, e2, a2, 0, 13);
- HHH(a2, b2, c2, d2, e2, x[0], 13);
- HHH(e2, a2, b2, c2, d2, x[4], 7);
- HHH(d2, e2, a2, b2, c2, 0, 5);
+ HHH(d, e, a, b, c, 0, 9);
+ HHH(c, d, e, a, b, x[5], 7);
+ HHH(b, c, d, e, a, x[1], 15);
+ HHH(a, b, c, d, e, x[3], 11);
+ HHH(e, a, b, c, d, x[7], 8);
+ HHH(d, e, a, b, c, 256, 6);
+ HHH(c, d, e, a, b, x[6], 6);
+ HHH(b, c, d, e, a, 0, 14);
+ HHH(a, b, c, d, e, 0, 12);
+ HHH(e, a, b, c, d, 0x80, 13);
+ HHH(d, e, a, b, c, 0, 5);
+ HHH(c, d, e, a, b, x[2], 14);
+ HHH(b, c, d, e, a, 0, 13);
+ HHH(a, b, c, d, e, x[0], 13);
+ HHH(e, a, b, c, d, x[4], 7);
+ HHH(d, e, a, b, c, 0, 5);
/* parallel round 4 */
- GGG(c2, d2, e2, a2, b2, x8, 15);
- GGG(b2, c2, d2, e2, a2, x[6], 5);
- GGG(a2, b2, c2, d2, e2, x[4], 8);
- GGG(e2, a2, b2, c2, d2, x[1], 11);
- GGG(d2, e2, a2, b2, c2, x[3], 14);
- GGG(c2, d2, e2, a2, b2, 0, 14);
- GGG(b2, c2, d2, e2, a2, 0, 6);
- GGG(a2, b2, c2, d2, e2, x[0], 14);
- GGG(e2, a2, b2, c2, d2, x[5], 6);
- GGG(d2, e2, a2, b2, c2, 0, 9);
- GGG(c2, d2, e2, a2, b2, x[2], 12);
- GGG(b2, c2, d2, e2, a2, 0, 9);
- GGG(a2, b2, c2, d2, e2, 0, 12);
- GGG(e2, a2, b2, c2, d2, x[7], 5);
- GGG(d2, e2, a2, b2, c2, 0, 15);
- GGG(c2, d2, e2, a2, b2, x14, 8);
+ GGG(c, d, e, a, b, 0x80, 15);
+ GGG(b, c, d, e, a, x[6], 5);
+ GGG(a, b, c, d, e, x[4], 8);
+ GGG(e, a, b, c, d, x[1], 11);
+ GGG(d, e, a, b, c, x[3], 14);
+ GGG(c, d, e, a, b, 0, 14);
+ GGG(b, c, d, e, a, 0, 6);
+ GGG(a, b, c, d, e, x[0], 14);
+ GGG(e, a, b, c, d, x[5], 6);
+ GGG(d, e, a, b, c, 0, 9);
+ GGG(c, d, e, a, b, x[2], 12);
+ GGG(b, c, d, e, a, 0, 9);
+ GGG(a, b, c, d, e, 0, 12);
+ GGG(e, a, b, c, d, x[7], 5);
+ GGG(d, e, a, b, c, 0, 15);
+ GGG(c, d, e, a, b, 256, 8);
/* parallel round 5 */
- FFF(b2, c2, d2, e2, a2, 0, 8);
- FFF(a2, b2, c2, d2, e2, 0, 5);
- FFF(e2, a2, b2, c2, d2, 0, 12);
- FFF(d2, e2, a2, b2, c2, x[4], 9);
- FFF(c2, d2, e2, a2, b2, x[1], 12);
- FFF(b2, c2, d2, e2, a2, x[5], 5);
- FFF(a2, b2, c2, d2, e2, x8, 14);
- FFF(e2, a2, b2, c2, d2, x[7], 6);
- FFF(d2, e2, a2, b2, c2, x[6], 8);
- FFF(c2, d2, e2, a2, b2, x[2], 13);
- FFF(b2, c2, d2, e2, a2, 0, 6);
- FFF(a2, b2, c2, d2, e2, x14, 5);
- FFF(e2, a2, b2, c2, d2, x[0], 15);
- FFF(d2, e2, a2, b2, c2, x[3], 13);
- FFF(c2, d2, e2, a2, b2, 0, 11);
- FFF(b2, c2, d2, e2, a2, 0, 11);
-
- digest[0] = _RIPEMD160_IV[1] + c1 + d2;
- digest[1] = _RIPEMD160_IV[2] + d1 + e2;
- digest[2] = _RIPEMD160_IV[3] + e1 + a2;
- digest[3] = _RIPEMD160_IV[4] + a1 + b2;
- digest[4] = _RIPEMD160_IV[0] + b1 + c2;
+ FFF(b, c, d, e, a, 0, 8);
+ FFF(a, b, c, d, e, 0, 5);
+ FFF(e, a, b, c, d, 0, 12);
+ FFF(d, e, a, b, c, x[4], 9);
+ FFF(c, d, e, a, b, x[1], 12);
+ FFF(b, c, d, e, a, x[5], 5);
+ FFF(a, b, c, d, e, 0x80, 14);
+ FFF(e, a, b, c, d, x[7], 6);
+ FFF(d, e, a, b, c, x[6], 8);
+ FFF(c, d, e, a, b, x[2], 13);
+ FFF(b, c, d, e, a, 0, 6);
+ FFF(a, b, c, d, e, 256, 5);
+ FFF(e, a, b, c, d, x[0], 15);
+ FFF(d, e, a, b, c, x[3], 13);
+ FFF(c, d, e, a, b, 0, 11);
+ FFF(b, c, d, e, a, 0, 11);
+
+ digest[0] = d;
+ digest[1] = e;
+ digest[2] = a;
+ digest[3] = b;
+ digest[4] = c;
}
-
void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5])
{
- unsigned int a1 = _RIPEMD160_IV[0];
- unsigned int b1 = _RIPEMD160_IV[1];
- unsigned int c1 = _RIPEMD160_IV[2];
- unsigned int d1 = _RIPEMD160_IV[3];
- unsigned int e1 = _RIPEMD160_IV[4];
-
- const unsigned int x8 = 0x00000080;
- const unsigned int x14 = 256;
-
- /* round 1 */
- FF(a1, b1, c1, d1, e1, x[0], 11);
- FF(e1, a1, b1, c1, d1, x[1], 14);
- FF(d1, e1, a1, b1, c1, x[2], 15);
- FF(c1, d1, e1, a1, b1, x[3], 12);
- FF(b1, c1, d1, e1, a1, x[4], 5);
- FF(a1, b1, c1, d1, e1, x[5], 8);
- FF(e1, a1, b1, c1, d1, x[6], 7);
- FF(d1, e1, a1, b1, c1, x[7], 9);
- FF(c1, d1, e1, a1, b1, x8, 11);
- FF(b1, c1, d1, e1, a1, 0, 13);
- FF(a1, b1, c1, d1, e1, 0, 14);
- FF(e1, a1, b1, c1, d1, 0, 15);
- FF(d1, e1, a1, b1, c1, 0, 6);
- FF(c1, d1, e1, a1, b1, 0, 7);
- FF(b1, c1, d1, e1, a1, x14, 9);
- FF(a1, b1, c1, d1, e1, 0, 8);
-
- /* round 2 */
- GG(e1, a1, b1, c1, d1, x[7], 7);
- GG(d1, e1, a1, b1, c1, x[4], 6);
- GG(c1, d1, e1, a1, b1, 0, 8);
- GG(b1, c1, d1, e1, a1, x[1], 13);
- GG(a1, b1, c1, d1, e1, 0, 11);
- GG(e1, a1, b1, c1, d1, x[6], 9);
- GG(d1, e1, a1, b1, c1, 0, 7);
- GG(c1, d1, e1, a1, b1, x[3], 15);
- GG(b1, c1, d1, e1, a1, 0, 7);
- GG(a1, b1, c1, d1, e1, x[0], 12);
- GG(e1, a1, b1, c1, d1, 0, 15);
- GG(d1, e1, a1, b1, c1, x[5], 9);
- GG(c1, d1, e1, a1, b1, x[2], 11);
- GG(b1, c1, d1, e1, a1, x14, 7);
- GG(a1, b1, c1, d1, e1, 0, 13);
- GG(e1, a1, b1, c1, d1, x8, 12);
-
- /* round 3 */
- HH(d1, e1, a1, b1, c1, x[3], 11);
- HH(c1, d1, e1, a1, b1, 0, 13);
- HH(b1, c1, d1, e1, a1, x14, 6);
- HH(a1, b1, c1, d1, e1, x[4], 7);
- HH(e1, a1, b1, c1, d1, 0, 14);
- HH(d1, e1, a1, b1, c1, 0, 9);
- HH(c1, d1, e1, a1, b1, x8, 13);
- HH(b1, c1, d1, e1, a1, x[1], 15);
- HH(a1, b1, c1, d1, e1, x[2], 14);
- HH(e1, a1, b1, c1, d1, x[7], 8);
- HH(d1, e1, a1, b1, c1, x[0], 13);
- HH(c1, d1, e1, a1, b1, x[6], 6);
- HH(b1, c1, d1, e1, a1, 0, 5);
- HH(a1, b1, c1, d1, e1, 0, 12);
- HH(e1, a1, b1, c1, d1, x[5], 7);
- HH(d1, e1, a1, b1, c1, 0, 5);
-
- /* round 4 */
- II(c1, d1, e1, a1, b1, x[1], 11);
- II(b1, c1, d1, e1, a1, 0, 12);
- II(a1, b1, c1, d1, e1, 0, 14);
- II(e1, a1, b1, c1, d1, 0, 15);
- II(d1, e1, a1, b1, c1, x[0], 14);
- II(c1, d1, e1, a1, b1, x8, 15);
- II(b1, c1, d1, e1, a1, 0, 9);
- II(a1, b1, c1, d1, e1, x[4], 8);
- II(e1, a1, b1, c1, d1, 0, 9);
- II(d1, e1, a1, b1, c1, x[3], 14);
- II(c1, d1, e1, a1, b1, x[7], 5);
- II(b1, c1, d1, e1, a1, 0, 6);
- II(a1, b1, c1, d1, e1, x14, 8);
- II(e1, a1, b1, c1, d1, x[5], 6);
- II(d1, e1, a1, b1, c1, x[6], 5);
- II(c1, d1, e1, a1, b1, x[2], 12);
-
- /* round 5 */
- JJ(b1, c1, d1, e1, a1, x[4], 9);
- JJ(a1, b1, c1, d1, e1, x[0], 15);
- JJ(e1, a1, b1, c1, d1, x[5], 5);
- JJ(d1, e1, a1, b1, c1, 0, 11);
- JJ(c1, d1, e1, a1, b1, x[7], 6);
- JJ(b1, c1, d1, e1, a1, 0, 8);
- JJ(a1, b1, c1, d1, e1, x[2], 13);
- JJ(e1, a1, b1, c1, d1, 0, 12);
- JJ(d1, e1, a1, b1, c1, x14, 5);
- JJ(c1, d1, e1, a1, b1, x[1], 12);
- JJ(b1, c1, d1, e1, a1, x[3], 13);
- JJ(a1, b1, c1, d1, e1, x8, 14);
- JJ(e1, a1, b1, c1, d1, 0, 11);
- JJ(d1, e1, a1, b1, c1, x[6], 8);
- JJ(c1, d1, e1, a1, b1, 0, 5);
- JJ(b1, c1, d1, e1, a1, 0, 6);
-
- unsigned int a2 = _RIPEMD160_IV[0];
- unsigned int b2 = _RIPEMD160_IV[1];
- unsigned int c2 = _RIPEMD160_IV[2];
- unsigned int d2 = _RIPEMD160_IV[3];
- unsigned int e2 = _RIPEMD160_IV[4];
-
- /* parallel round 1 */
- JJJ(a2, b2, c2, d2, e2, x[5], 8);
- JJJ(e2, a2, b2, c2, d2, x14, 9);
- JJJ(d2, e2, a2, b2, c2, x[7], 9);
- JJJ(c2, d2, e2, a2, b2, x[0], 11);
- JJJ(b2, c2, d2, e2, a2, 0, 13);
- JJJ(a2, b2, c2, d2, e2, x[2], 15);
- JJJ(e2, a2, b2, c2, d2, 0, 15);
- JJJ(d2, e2, a2, b2, c2, x[4], 5);
- JJJ(c2, d2, e2, a2, b2, 0, 7);
- JJJ(b2, c2, d2, e2, a2, x[6], 7);
- JJJ(a2, b2, c2, d2, e2, 0, 8);
- JJJ(e2, a2, b2, c2, d2, x8, 11);
- JJJ(d2, e2, a2, b2, c2, x[1], 14);
- JJJ(c2, d2, e2, a2, b2, 0, 14);
- JJJ(b2, c2, d2, e2, a2, x[3], 12);
- JJJ(a2, b2, c2, d2, e2, 0, 6);
+ __private unsigned int digest1[5];
+ __private unsigned int digest2[5];
- /* parallel round 2 */
- III(e2, a2, b2, c2, d2, x[6], 9);
- III(d2, e2, a2, b2, c2, 0, 13);
- III(c2, d2, e2, a2, b2, x[3], 15);
- III(b2, c2, d2, e2, a2, x[7], 7);
- III(a2, b2, c2, d2, e2, x[0], 12);
- III(e2, a2, b2, c2, d2, 0, 8);
- III(d2, e2, a2, b2, c2, x[5], 9);
- III(c2, d2, e2, a2, b2, 0, 11);
- III(b2, c2, d2, e2, a2, x14, 7);
- III(a2, b2, c2, d2, e2, 0, 7);
- III(e2, a2, b2, c2, d2, x8, 12);
- III(d2, e2, a2, b2, c2, 0, 7);
- III(c2, d2, e2, a2, b2, x[4], 6);
- III(b2, c2, d2, e2, a2, 0, 15);
- III(a2, b2, c2, d2, e2, x[1], 13);
- III(e2, a2, b2, c2, d2, x[2], 11);
+ ripemd160p1(x, digest1);
+ ripemd160p2(x, digest2);
- /* parallel round 3 */
- HHH(d2, e2, a2, b2, c2, 0, 9);
- HHH(c2, d2, e2, a2, b2, x[5], 7);
- HHH(b2, c2, d2, e2, a2, x[1], 15);
- HHH(a2, b2, c2, d2, e2, x[3], 11);
- HHH(e2, a2, b2, c2, d2, x[7], 8);
- HHH(d2, e2, a2, b2, c2, x14, 6);
- HHH(c2, d2, e2, a2, b2, x[6], 6);
- HHH(b2, c2, d2, e2, a2, 0, 14);
- HHH(a2, b2, c2, d2, e2, 0, 12);
- HHH(e2, a2, b2, c2, d2, x8, 13);
- HHH(d2, e2, a2, b2, c2, 0, 5);
- HHH(c2, d2, e2, a2, b2, x[2], 14);
- HHH(b2, c2, d2, e2, a2, 0, 13);
- HHH(a2, b2, c2, d2, e2, x[0], 13);
- HHH(e2, a2, b2, c2, d2, x[4], 7);
- HHH(d2, e2, a2, b2, c2, 0, 5);
-
- /* parallel round 4 */
- GGG(c2, d2, e2, a2, b2, x8, 15);
- GGG(b2, c2, d2, e2, a2, x[6], 5);
- GGG(a2, b2, c2, d2, e2, x[4], 8);
- GGG(e2, a2, b2, c2, d2, x[1], 11);
- GGG(d2, e2, a2, b2, c2, x[3], 14);
- GGG(c2, d2, e2, a2, b2, 0, 14);
- GGG(b2, c2, d2, e2, a2, 0, 6);
- GGG(a2, b2, c2, d2, e2, x[0], 14);
- GGG(e2, a2, b2, c2, d2, x[5], 6);
- GGG(d2, e2, a2, b2, c2, 0, 9);
- GGG(c2, d2, e2, a2, b2, x[2], 12);
- GGG(b2, c2, d2, e2, a2, 0, 9);
- GGG(a2, b2, c2, d2, e2, 0, 12);
- GGG(e2, a2, b2, c2, d2, x[7], 5);
- GGG(d2, e2, a2, b2, c2, 0, 15);
- GGG(c2, d2, e2, a2, b2, x14, 8);
+ digest[0] = digest1[0] + digest2[0];
+ digest[1] = digest1[1] + digest2[1];
+ digest[2] = digest1[2] + digest2[2];
+ digest[3] = digest1[3] + digest2[3];
+ digest[4] = digest1[4] + digest2[4];
+}
- /* parallel round 5 */
- FFF(b2, c2, d2, e2, a2, 0, 8);
- FFF(a2, b2, c2, d2, e2, 0, 5);
- FFF(e2, a2, b2, c2, d2, 0, 12);
- FFF(d2, e2, a2, b2, c2, x[4], 9);
- FFF(c2, d2, e2, a2, b2, x[1], 12);
- FFF(b2, c2, d2, e2, a2, x[5], 5);
- FFF(a2, b2, c2, d2, e2, x8, 14);
- FFF(e2, a2, b2, c2, d2, x[7], 6);
- FFF(d2, e2, a2, b2, c2, x[6], 8);
- FFF(c2, d2, e2, a2, b2, x[2], 13);
- FFF(b2, c2, d2, e2, a2, 0, 6);
- FFF(a2, b2, c2, d2, e2, x14, 5);
- FFF(e2, a2, b2, c2, d2, x[0], 15);
- FFF(d2, e2, a2, b2, c2, x[3], 13);
- FFF(c2, d2, e2, a2, b2, 0, 11);
- FFF(b2, c2, d2, e2, a2, 0, 11);
-
- digest[0] = c1 + d2;
- digest[1] = d1 + e2;
- digest[2] = e1 + a2;
- digest[3] = a1 + b2;
- digest[4] = b1 + c2;
+void ripemd160FinalRound(const unsigned int hIn[5], unsigned int hOut[5])
+{
+ hOut[0] = endian(hIn[0] + RIPEMD160_IV[1]);
+ hOut[1] = endian(hIn[1] + RIPEMD160_IV[2]);
+ hOut[2] = endian(hIn[2] + RIPEMD160_IV[3]);
+ hOut[3] = endian(hIn[3] + RIPEMD160_IV[4]);
+ hOut[4] = endian(hIn[4] + RIPEMD160_IV[0]);
}
+
#endif
diff --git a/clMath/secp256k1.cl b/clMath/secp256k1.cl
index 2a63f3f..95c5556 100644
--- a/clMath/secp256k1.cl
+++ b/clMath/secp256k1.cl
@@ -1,91 +1,41 @@
-#ifndef _SECP256K1_CL
-#define _SECP256K1_CL
-
-typedef ulong uint64_t;
-
-typedef struct {
- uint v[8];
-}uint256_t;
+#ifndef SECP256K1_CL
+#define SECP256K1_CL
+typedef struct uint256_t {
+ unsigned int v[8];
+} uint256_t;
/**
- Prime modulus 2^256 - 2^32 - 977
+ * Base point X
*/
-__constant unsigned int _P[8] = {
- 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
-};
-
-__constant unsigned int _P_MINUS1[8] = {
- 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
-};
-
-/**
- Base point X
- */
-__constant unsigned int _GX[8] = {
+__constant unsigned int GX[8] = {
0x79BE667E, 0xF9DCBBAC, 0x55A06295, 0xCE870B07, 0x029BFCDB, 0x2DCE28D9, 0x59F2815B, 0x16F81798
};
/**
- Base point Y
+ * Base point Y
*/
-__constant unsigned int _GY[8] = {
+__constant unsigned int GY[8] = {
0x483ADA77, 0x26A3C465, 0x5DA4FBFC, 0x0E1108A8, 0xFD17B448, 0xA6855419, 0x9C47D08F, 0xFB10D4B8
};
-
/**
* Group order
*/
-__constant unsigned int _N[8] = {
+__constant unsigned int N[8] = {
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xBAAEDCE6, 0xAF48A03B, 0xBFD25E8C, 0xD0364141
};
-__constant unsigned int _INFINITY[8] = {
- 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
+/**
+ * Prime modulus 2^256 - 2^32 - 977
+ */
+__constant unsigned int P[8] = {
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
};
-void printBigInt(const unsigned int x[8])
-{
- printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
- x[0], x[1], x[2], x[3],
- x[4], x[5], x[6], x[7]);
-}
-
-// Add with carry
-unsigned int addc(unsigned int a, unsigned int b, unsigned int *carry)
-{
- unsigned int sum = a + *carry;
-
- unsigned int c1 = (sum < a) ? 1 : 0;
-
- sum = sum + b;
-
- unsigned int c2 = (sum < b) ? 1 : 0;
-
- *carry = c1 | c2;
-
- return sum;
-}
-
-// Subtract with borrow
-unsigned int subc(unsigned int a, unsigned int b, unsigned int *borrow)
-{
- unsigned int diff = a - *borrow;
-
- *borrow = (diff > a) ? 1 : 0;
-
- unsigned int diff2 = diff - b;
-
- *borrow |= (diff2 > diff) ? 1 : 0;
-
- return diff2;
-}
-
#ifdef DEVICE_VENDOR_INTEL
-
// Intel devices have a mul_hi bug
-unsigned int mul_hi977(unsigned int x)
+inline unsigned int mul_hi977(unsigned int x)
{
unsigned int high = x >> 16;
unsigned int low = x & 0xffff;
@@ -94,657 +44,668 @@ unsigned int mul_hi977(unsigned int x)
}
// 32 x 32 multiply-add
-void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c)
+inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c)
{
- *low = a * 977;
- unsigned int tmp = *low + c;
+ *low = *a * 977;
+ unsigned int tmp = *low + *c;
unsigned int carry = tmp < *low ? 1 : 0;
*low = tmp;
- *high = mul_hi977(a) + carry;
+ *high = mul_hi977(*a) + carry;
}
-
#else
-// 32 x 32 multiply-add
-void madd977(unsigned int *high, unsigned int *low, unsigned int a, unsigned int c)
+inline void madd977(unsigned int *high, unsigned int *low, unsigned int *a, unsigned int *c)
{
- *low = a * 977;
- unsigned int tmp = *low + c;
+ *low = *a * 977;
+ unsigned int tmp = *low + *c;
unsigned int carry = tmp < *low ? 1 : 0;
*low = tmp;
- *high = mad_hi(a, (unsigned int)977, carry);
+ *high = mad_hi(*a, (unsigned int)977, carry);
}
#endif
-// 32 x 32 multiply-add
-void madd(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b, unsigned int c)
-{
- *low = a * b;
- unsigned int tmp = *low + c;
- unsigned int carry = tmp < *low ? 1 : 0;
- *low = tmp;
- *high = mad_hi(a, b, carry);
-}
-
-void mull(unsigned int *high, unsigned int *low, unsigned int a, unsigned int b)
-{
- *low = a * b;
- *high = mul_hi(a, b);
-}
-
-
-uint256_t sub256k(uint256_t a, uint256_t b, unsigned int* borrow_ptr)
-{
- unsigned int borrow = 0;
- uint256_t c;
-
- for(int i = 7; i >= 0; i--) {
- c.v[i] = subc(a.v[i], b.v[i], &borrow);
- }
-
- *borrow_ptr = borrow;
-
- return c;
-}
-
-bool greaterThanEqualToP(const unsigned int a[8])
-{
- for(int i = 0; i < 8; i++) {
- if(a[i] > _P_MINUS1[i]) {
- return true;
- } else if(a[i] < _P_MINUS1[i]) {
- return false;
- }
- }
-
- return true;
-}
+// Add with carry
+#define addc(a, b, sum, carry, tmp) \
+ sum = (a) + (carry); \
+ tmp = ((sum) < (a)) * 1; \
+ sum = (sum) + (b); \
+ carry = (tmp) | (((sum) < (b)) * 1);
+
+// subtract with borrow
+#define subc(a, b, diff, borrow, tmp) \
+ tmp = (a) - (borrow); \
+ borrow = ((tmp) > (a)) * 1; \
+ diff = (tmp) - (b); \
+ borrow |= ((diff) > (tmp)) ? 1 : 0;
+
+#define add256k(a, b, c, carry, tmp) \
+ addc(a[7], b[7], c[7], carry, tmp); \
+ addc(a[6], b[6], c[6], carry, tmp); \
+ addc(a[5], b[5], c[5], carry, tmp); \
+ addc(a[4], b[4], c[4], carry, tmp); \
+ addc(a[3], b[3], c[3], carry, tmp); \
+ addc(a[2], b[2], c[2], carry, tmp); \
+ addc(a[1], b[1], c[1], carry, tmp); \
+ addc(a[0], b[0], c[0], carry, tmp);
+
+#define sub256k( a, b, c, borrow, tmp) \
+ subc(a[7], b[7], c[7], borrow, tmp); \
+ subc(a[6], b[6], c[6], borrow, tmp); \
+ subc(a[5], b[5], c[5], borrow, tmp); \
+ subc(a[4], b[4], c[4], borrow, tmp); \
+ subc(a[3], b[3], c[3], borrow, tmp); \
+ subc(a[2], b[2], c[2], borrow, tmp); \
+ subc(a[1], b[1], c[1], borrow, tmp); \
+ subc(a[0], b[0], c[0], borrow, tmp);
+
+#define isInfinity256k(a) \
+ ( \
+ (a[0] == 0xffffffff) && \
+ (a[1] == 0xffffffff) && \
+ (a[2] == 0xffffffff) && \
+ (a[3] == 0xffffffff) && \
+ (a[4] == 0xffffffff) && \
+ (a[5] == 0xffffffff) && \
+ (a[6] == 0xffffffff) && \
+ (a[7] == 0xffffffff) \
+ )
+
+#define greaterOrEqualToP(a) \
+ (a[6] >= P[6] || a[7] >= P[7])
+
+#define equal256k(a, b) \
+ ( \
+ (a[0] == b[0]) && \
+ (a[1] == b[1]) && \
+ (a[2] == b[2]) && \
+ (a[3] == b[3]) && \
+ (a[4] == b[4]) && \
+ (a[5] == b[5]) && \
+ (a[6] == b[6]) && \
+ (a[7] == b[7]) \
+ )
void multiply256(const unsigned int x[8], const unsigned int y[8], unsigned int out_high[8], unsigned int out_low[8])
{
- unsigned int z[16];
-
- unsigned int high = 0;
+ __private unsigned long product;
// First round, overwrite z
- for(int j = 7; j >= 0; j--) {
-
- uint64_t product = (uint64_t)x[7] * y[j];
-
- product = product + high;
-
- z[7 + j + 1] = (unsigned int)product;
- high = (unsigned int)(product >> 32);
- }
- z[7] = high;
-
- for(int i = 6; i >= 0; i--) {
-
- high = 0;
-
- for(int j = 7; j >= 0; j--) {
-
- uint64_t product = (uint64_t)x[i] * y[j];
-
- product = product + z[i + j + 1] + high;
+ product = (unsigned long)x[7] * y[7];
+ out_low[7] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[6] + (unsigned int)(product >> 32);
+ out_low[6] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[5] + (unsigned int)(product >> 32);
+ out_low[5] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[4] + (unsigned int)(product >> 32);
+ out_low[4] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[3] + (unsigned int)(product >> 32);
+ out_low[3] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[2] + (unsigned int)(product >> 32);
+ out_low[2] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[1] + (unsigned int)(product >> 32);
+ out_low[1] = (unsigned int)product;
+
+ product = (unsigned long)x[7] * y[0] + (unsigned int)(product >> 32);
+ out_low[0] = (unsigned int)product;
+ out_high[7] = (unsigned int)(product >> 32);
+
+ product = (unsigned long)x[6] * y[7] + out_low[6];
+ out_low[6] = (unsigned int)product;
+
+ /** round6 */
+ product = (unsigned long)x[6] * y[6] + out_low[5] + (product >> 32);
+ out_low[5] = (unsigned int)product;
+
+ product = (unsigned long)x[6] * y[5] + out_low[4] + (product >> 32);
+ out_low[4] = (unsigned int)product;
+
+ product = (unsigned long)x[6] * y[4] + out_low[3] + (product >> 32);
+ out_low[3] = (unsigned int)product;
+
+ product = (unsigned long)x[6] * y[3] + out_low[2] + (product >> 32);
+ out_low[2] = (unsigned int)product;
+
+ product = (unsigned long)x[6] * y[2] + out_low[1] + (product >> 32);
+ out_low[1] = (unsigned int)product;
+
+ product = (unsigned long)x[6] * y[1] + out_low[0] + (product >> 32);
+ out_low[0] = (unsigned int)product;
+
+ product = (unsigned long)x[6] * y[0] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
+ out_high[6] = product >> 32;
- z[i + j + 1] = (unsigned int)product;
+ /** round 5 */
+ product = (unsigned long)x[5] * y[7] + out_low[5];
+ out_low[5] = (unsigned int)product;
- high = product >> 32;
- }
+ product = (unsigned long)x[5] * y[6] + out_low[4] + (product >> 32);
+ out_low[4] = (unsigned int)product;
- z[i] = high;
- }
+ product = (unsigned long)x[5] * y[5] + out_low[3] + (product >> 32);
+ out_low[3] = (unsigned int)product;
- for(int i = 0; i < 8; i++) {
- out_high[i] = z[i];
- out_low[i] = z[8 + i];
- }
-}
+ product = (unsigned long)x[5] * y[4] + out_low[2] + (product >> 32);
+ out_low[2] = (unsigned int)product;
+ product = (unsigned long)x[5] * y[3] + out_low[1] + (product >> 32);
+ out_low[1] = (unsigned int)product;
-unsigned int add256(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
- unsigned int carry = 0;
-
- for(int i = 7; i >= 0; i--) {
- c[i] = addc(a[i], b[i], &carry);
- }
-
- return carry;
-}
-
-uint256_t add256k(uint256_t a, uint256_t b, unsigned int* carry_ptr)
-{
- uint256_t c;
- unsigned int carry = 0;
+ product = (unsigned long)x[5] * y[2] + out_low[0] + (product >> 32);
+ out_low[0] = (unsigned int)product;
+
+ product = (unsigned long)x[5] * y[1] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
+
+ product = (unsigned long)x[5] * y[0] + out_high[6] + (product >> 32);
+ out_high[6] = (unsigned int)product;
+ out_high[5] = product >> 32;
- for(int i = 7; i >= 0; i--) {
- c.v[i] = addc(a.v[i], b.v[i], &carry);
- }
+ /** round 4 */
+ product = (unsigned long)x[4] * y[7] + out_low[4];
+ out_low[4] = (unsigned int)product;
- *carry_ptr = carry;
+ product = (unsigned long)x[4] * y[6] + out_low[3] + (product >> 32);
+ out_low[3] = (unsigned int)product;
- return c;
-}
+ product = (unsigned long)x[4] * y[5] + out_low[2] + (product >> 32);
+ out_low[2] = (unsigned int)product;
-bool isInfinity(const unsigned int x[8])
-{
- bool isf = true;
+ product = (unsigned long)x[4] * y[4] + out_low[1] + (product >> 32);
+ out_low[1] = (unsigned int)product;
- for(int i = 0; i < 8; i++) {
- if(x[i] != 0xffffffff) {
- isf = false;
- }
- }
+ product = (unsigned long)x[4] * y[3] + out_low[0] + (product >> 32);
+ out_low[0] = (unsigned int)product;
- return isf;
-}
-
-bool isInfinity256k(const uint256_t x)
-{
- bool isf = true;
+ product = (unsigned long)x[4] * y[2] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
+
+ product = (unsigned long)x[4] * y[1] + out_high[6] + (product >> 32);
+ out_high[6] = (unsigned int)product;
+
+ product = (unsigned long)x[4] * y[0] + out_high[5] + (product >> 32);
+ out_high[5] = (unsigned int)product;
+ out_high[4] = product >> 32;
- for(int i = 0; i < 8; i++) {
- if(x.v[i] != 0xffffffff) {
- isf = false;
- }
- }
+ /** round 3 */
+ product = (unsigned long)x[3] * y[7] + out_low[3];
+ out_low[3] = (unsigned int)product;
- return isf;
-}
+ product = (unsigned long)x[3] * y[6] + out_low[2] + (product >> 32);
+ out_low[2] = (unsigned int)product;
-bool equal(const unsigned int a[8], const unsigned int b[8])
-{
- for(int i = 0; i < 8; i++) {
- if(a[i] != b[i]) {
- return false;
- }
- }
+ product = (unsigned long)x[3] * y[5] + out_low[1] + (product >> 32);
+ out_low[1] = (unsigned int)product;
- return true;
-}
+ product = (unsigned long)x[3] * y[4] + out_low[0] + (product >> 32);
+ out_low[0] = (unsigned int)product;
-bool equal256k(uint256_t a, uint256_t b)
-{
- for(int i = 0; i < 8; i++) {
- if(a.v[i] != b.v[i]) {
- return false;
- }
- }
+ product = (unsigned long)x[3] * y[3] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
- return true;
-}
+ product = (unsigned long)x[3] * y[2] + out_high[6] + (product >> 32);
+ out_high[6] = (unsigned int)product;
+
+ product = (unsigned long)x[3] * y[1] + out_high[5] + (product >> 32);
+ out_high[5] = (unsigned int)product;
+
+ product = (unsigned long)x[3] * y[0] + out_high[4] + (product >> 32);
+ out_high[4] = (unsigned int)product;
+ out_high[3] = product >> 32;
-inline uint256_t readInt256(__global const uint256_t* ara, int idx)
-{
- return ara[idx];
-}
+ /** round 2 */
+ product = (unsigned long)x[2] * y[7] + out_low[2];
+ out_low[2] = (unsigned int)product;
-/*
- * Read least-significant word
- */
-unsigned int readLSW(__global const unsigned int *ara, int idx)
-{
- return ara[idx * 8 + 7];
-}
+ product = (unsigned long)x[2] * y[6] + out_low[1] + (product >> 32);
+ out_low[1] = (unsigned int)product;
-unsigned int readLSW256k(__global const uint256_t* ara, int idx)
-{
- return ara[idx].v[7];
-}
+ product = (unsigned long)x[2] * y[5] + out_low[0] + (product >> 32);
+ out_low[0] = (unsigned int)product;
-unsigned int readWord256k(__global const uint256_t* ara, int idx, int word)
-{
- return ara[idx].v[word];
-}
+ product = (unsigned long)x[2] * y[4] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
-unsigned int addP(const unsigned int a[8], unsigned int c[8])
-{
- unsigned int carry = 0;
+ product = (unsigned long)x[2] * y[3] + out_high[6] + (product >> 32);
+ out_high[6] = (unsigned int)product;
- for(int i = 7; i >= 0; i--) {
- c[i] = addc(a[i], _P[i], &carry);
- }
+ product = (unsigned long)x[2] * y[2] + out_high[5] + (product >> 32);
+ out_high[5] = (unsigned int)product;
+
+ product = (unsigned long)x[2] * y[1] + out_high[4] + (product >> 32);
+ out_high[4] = (unsigned int)product;
+
+ product = (unsigned long)x[2] * y[0] + out_high[3] + (product >> 32);
+ out_high[3] = (unsigned int)product;
+ out_high[2] = product >> 32;
+
+ /** round 1 */
+ product = (unsigned long)x[1] * y[7] + out_low[1];
+ out_low[1] = (unsigned int)product;
- return carry;
-}
+ product = (unsigned long)x[1] * y[6] + out_low[0] + (product >> 32);
+ out_low[0] = (unsigned int)product;
-unsigned int subP(const unsigned int a[8], unsigned int c[8])
-{
- unsigned int borrow = 0;
- for(int i = 7; i >= 0; i--) {
- c[i] = subc(a[i], _P[i], &borrow);
- }
+ product = (unsigned long)x[1] * y[5] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
- return borrow;
-}
+ product = (unsigned long)x[1] * y[4] + out_high[6] + (product >> 32);
+ out_high[6] = (unsigned int)product;
-/**
- * Subtraction mod p
- */
-uint256_t subModP256k(uint256_t a, uint256_t b)
-{
- unsigned int borrow = 0;
- uint256_t c = sub256k(a, b, &borrow);
- if(borrow) {
- addP(c.v, c.v);
- }
+ product = (unsigned long)x[1] * y[3] + out_high[5] + (product >> 32);
+ out_high[5] = (unsigned int)product;
- return c;
-}
+ product = (unsigned long)x[1] * y[2] + out_high[4] + (product >> 32);
+ out_high[4] = (unsigned int)product;
+
+ product = (unsigned long)x[1] * y[1] + out_high[3] + (product >> 32);
+ out_high[3] = (unsigned int)product;
+
+ product = (unsigned long)x[1] * y[0] + out_high[2] + (product >> 32);
+ out_high[2] = (unsigned int)product;
+ out_high[1] = product >> 32;
+ /** round 0 */
+ product = (unsigned long)x[0] * y[7] + out_low[0];
+ out_low[0] = (unsigned int)product;
-uint256_t addModP256k(uint256_t a, uint256_t b)
-{
- unsigned int carry = 0;
+ product = (unsigned long)x[0] * y[6] + out_high[7] + (product >> 32);
+ out_high[7] = (unsigned int)product;
- uint256_t c = add256k(a, b, &carry);
+ product = (unsigned long)x[0] * y[5] + out_high[6] + (product >> 32);
+ out_high[6] = (unsigned int)product;
- bool gt = false;
- for(int i = 0; i < 8; i++) {
- if(c.v[i] > _P[i]) {
- gt = true;
- break;
- } else if(c.v[i] < _P[i]) {
- break;
- }
- }
+ product = (unsigned long)x[0] * y[4] + out_high[5] + (product >> 32);
+ out_high[5] = (unsigned int)product;
- if(carry || gt) {
- subP(c.v, c.v);
- }
+ product = (unsigned long)x[0] * y[3] + out_high[4] + (product >> 32);
+ out_high[4] = (unsigned int)product;
- return c;
+ product = (unsigned long)x[0] * y[2] + out_high[3] + (product >> 32);
+ out_high[3] = (unsigned int)product;
+
+ product = (unsigned long)x[0] * y[1] + out_high[2] + (product >> 32);
+ out_high[2] = (unsigned int)product;
+
+ product = (unsigned long)x[0] * y[0] + out_high[1] + (product >> 32);
+ out_high[1] = (unsigned int)product;
+ out_high[0] = product >> 32;
}
-
-void mulModP(const unsigned int a[8], const unsigned int b[8], unsigned int product_low[8])
+void mulModP(unsigned int a[8], unsigned int b[8], unsigned int product_low[8])
{
- unsigned int high[8];
+ __private unsigned int high[8];
+ __private unsigned int low[8];
- unsigned int hWord = 0;
- unsigned int carry = 0;
+ __private unsigned int hWord = 0;
+ __private unsigned int carry = 0;
+ __private unsigned int t = 0;
+ __private unsigned int product6 = 0;
+ __private unsigned int product7 = 0;
+ __private unsigned int tmp;
// 256 x 256 multiply
- multiply256(a, b, high, product_low);
+ multiply256(a, b, high, low);
+ product_low[7] = low[7];
+ product_low[6] = low[6];
+ product_low[5] = low[5];
+ product_low[4] = low[4];
+ product_low[3] = low[3];
+ product_low[2] = low[2];
+ product_low[1] = low[1];
+ product_low[0] = low[0];
// Add 2^32 * high to the low 256 bits (shift left 1 word and add)
// Affects product[14] to product[6]
- for(int i = 6; i >= 0; i--) {
- product_low[i] = addc(product_low[i], high[i + 1], &carry);
- }
- unsigned int product7 = addc(high[0], 0, &carry);
- unsigned int product6 = carry;
+ addc(product_low[6], high[7], product_low[6], carry, tmp);
+ addc(product_low[5], high[6], product_low[5], carry, tmp);
+ addc(product_low[4], high[5], product_low[4], carry, tmp);
+ addc(product_low[3], high[4], product_low[3], carry, tmp);
+ addc(product_low[2], high[3], product_low[2], carry, tmp);
+ addc(product_low[1], high[2], product_low[1], carry, tmp);
+ addc(product_low[0], high[1], product_low[0], carry, tmp);
+
+ addc(high[0], 0, product7, carry, tmp);
+ product6 = carry;
carry = 0;
// Multiply high by 977 and add to low
// Affects product[15] to product[5]
for(int i = 7; i >= 0; i--) {
- unsigned int t = 0;
- madd977(&hWord, &t, high[i], hWord);
- product_low[i] = addc(product_low[i], t, &carry);
+ madd977(&hWord, &t, &high[i], &hWord);
+ addc(product_low[i], t, product_low[i], carry, tmp);
+ t = 0;
}
- product7 = addc(product7, hWord, &carry);
- product6 = addc(product6, 0, &carry);
+ addc(product7, hWord, high[7], carry, tmp);
+ addc(product6, 0, high[6], carry, tmp);
// Multiply high 2 words by 2^32 and add to low
// Affects product[14] to product[7]
carry = 0;
- high[7] = product7;
- high[6] = product6;
- product7 = 0;
- product6 = 0;
+ addc(product_low[6], high[7], product_low[6], carry, tmp);
+ addc(product_low[5], high[6], product_low[5], carry, tmp);
- product_low[6] = addc(product_low[6], high[7], &carry);
- product_low[5] = addc(product_low[5], high[6], &carry);
-
- // Propagate the carry
- for(int i = 4; i >= 0; i--) {
- product_low[i] = addc(product_low[i], 0, &carry);
- }
- product7 = carry;
+ addc(product_low[4], 0, product_low[4], carry, tmp);
+ addc(product_low[3], 0, product_low[3], carry, tmp);
+ addc(product_low[2], 0, product_low[2], carry, tmp);
+ addc(product_low[1], 0, product_low[1], carry, tmp);
+ addc(product_low[0], 0, product_low[0], carry, tmp);
// Multiply top 2 words by 977 and add to low
// Affects product[15] to product[7]
carry = 0;
hWord = 0;
- unsigned int t = 0;
- madd977(&hWord, &t, high[7], hWord);
- product_low[7] = addc(product_low[7], t, &carry);
- madd977(&hWord, &t, high[6], hWord);
- product_low[6] = addc(product_low[6], t, &carry);
- product_low[5] = addc(product_low[5], hWord, &carry);
-
+ madd977(&hWord, &t, &high[7], &hWord);
+ addc(product_low[7], t, product_low[7], carry, tmp);
+ madd977(&hWord, &t, &high[6], &hWord);
+ addc(product_low[6], t, product_low[6], carry, tmp);
+ addc(product_low[5], hWord, product_low[5], carry, tmp);
// Propagate carry
- for(int i = 4; i >= 0; i--) {
- product_low[i] = addc(product_low[i], 0, &carry);
- }
- product7 = carry;
+ addc(product_low[4], 0, product_low[4], carry, tmp);
+ addc(product_low[3], 0, product_low[3], carry, tmp);
+ addc(product_low[2], 0, product_low[2], carry, tmp);
+ addc(product_low[1], 0, product_low[1], carry, tmp);
+ addc(product_low[0], 0, product_low[0], carry, tmp);
// Reduce if >= P
- if(product7 || greaterThanEqualToP(product_low)) {
- subP(product_low, product_low);
+ if(carry || greaterOrEqualToP(product_low)) {
+ carry = 0;
+ sub256k(product_low, P, product_low, carry, tmp);
}
}
-uint256_t mulModP256k(uint256_t a, uint256_t b)
+/**
+ * Subtraction mod p
+ */
+void subModP256k(unsigned int a[8], unsigned int b[8], unsigned int c[8])
{
- uint256_t c;
+ __private unsigned int borrow = 0;
+ __private unsigned int tmp;
+
+ sub256k(a, b, c, borrow, tmp);
+
+ if (borrow) {
+ borrow = 0;
+ add256k(c, P, c, borrow, tmp);
+ }
+}
- mulModP(a.v, b.v, c.v);
+/**
+ * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains
+ */
+void invModP256k(unsigned int x[8])
+{
+ __private unsigned int y[8] = {0, 0, 0, 0, 0, 0, 0, 1};
+
+ mulModP(x, y, y);
+ mulModP(x, x, x);
+ mulModP(x, x, x);
+ mulModP(x, y, y);
+ mulModP(x, x, x);
+ mulModP(x, y, y);
+ mulModP(x, x, x);
+ mulModP(x, x, x);
+ mulModP(x, y, y);
+
+ for(int i = 0; i < 5; i++) {
+ mulModP(x, x, x);
+ }
- return c;
-}
+ for(int i = 0; i < 22; i++) {
+ mulModP(x, y, y);
+ mulModP(x, x, x);
+ }
+ mulModP(x, x, x);
-uint256_t squareModP256k(uint256_t a)
-{
- uint256_t b;
- mulModP(a.v, a.v, b.v);
+ for(int i = 0; i < 222; i++) {
+ mulModP(x, y, y);
+ mulModP(x, x, x);
+ }
- return b;
+ mulModP(x, y, x);
}
-
-/**
- * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains
- */
-uint256_t invModP256k(uint256_t value)
+void addModP256k(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
{
- uint256_t x = value;
-
-
- //unsigned int y[8] = { 0, 0, 0, 0, 0, 0, 0, 1 };
- uint256_t y = {{0, 0, 0, 0, 0, 0, 0, 1}};
-
- // 0xd - 1101
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
-
- // 0x2 - 0010
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
-
- // 0xc = 0x1100
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
-
-
- // 0xfffff
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
-
-
- // 0xe - 1110
- //y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffff
- for(int i = 0; i < 219; i++) {
- y = mulModP256k(x, y);
- x = squareModP256k(x);
- }
- y = mulModP256k(x, y);
+ __private unsigned int borrow = 0;
+ __private unsigned int carry = 0;
+ __private unsigned int tmp = 0;
- return y;
-}
+ add256k(a, b, c, carry, tmp);
+
+ if(carry) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[0] > P[0]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[0] < P[0]) { }
-void beginBatchAdd256k(uint256_t px, uint256_t x, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse)
+ else if(c[1] > P[1]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[1] < P[1]) { }
+
+ else if(c[2] > P[2]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[2] < P[2]) { }
+
+ else if(c[3] > P[3]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[3] < P[3]) { }
+
+ else if(c[4] > P[4]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[4] < P[4]) { }
+
+ else if(c[5] > P[5]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[5] < P[5]) { }
+
+ else if(c[6] > P[6]) { sub256k(c, P, c, borrow, tmp); }
+ else if(c[6] < P[6]) { }
+
+ else if(c[7] > P[7]) { sub256k(c, P, c, borrow, tmp); }
+}
+
+void doBatchInverse256k(unsigned int x[8])
{
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
- int dim = get_global_size(0);
+ invModP256k(x);
+}
+
+void beginBatchAdd256k(
+ const uint256_t px,
+ const uint256_t x,
+ __global uint256_t* chain,
+ const int i,
+ const int batchIdx,
+ uint256_t* inverse
+) {
+ __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+ __private int dim = get_global_size(0);
+
+ __private unsigned int t[8];
// x = Gx - x
- uint256_t t = subModP256k(px, x);
+ subModP256k(px.v, x.v, t);
// Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1,
// c[2] = diff2 * diff1 * diff0, etc
- *inverse = mulModP256k(*inverse, t);
+ mulModP(inverse->v, t, inverse->v);
chain[batchIdx * dim + gid] = *inverse;
}
-
-void beginBatchAddWithDouble256k(uint256_t px, uint256_t py, __global uint256_t* xPtr, __global uint256_t* chain, int i, int batchIdx, uint256_t* inverse)
-{
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
- int dim = get_global_size(0);
-
- uint256_t x = xPtr[i];
-
- if(equal256k(px, x)) {
- x = addModP256k(py, py);
+void beginBatchAddWithDouble256k(
+ const uint256_t px,
+ const uint256_t py,
+ __global uint256_t* xPtr,
+ __global uint256_t* chain,
+ const int i,
+ const int batchIdx,
+ uint256_t* inverse
+) {
+ __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+ __private int dim = get_global_size(0);
+ __private uint256_t x = xPtr[i];
+
+ if(equal256k(px.v, x.v)) {
+ addModP256k(py.v,py.v, x.v);
} else {
// x = Gx - x
- x = subModP256k(px, x);
+ subModP256k(px.v, x.v, x.v);
}
// Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1,
// c[2] = diff2 * diff1 * diff0, etc
- *inverse = mulModP256k(x, *inverse);
+ mulModP(x.v, inverse->v, inverse->v);
chain[batchIdx * dim + gid] = *inverse;
}
+void completeBatchAdd256k(
+ const uint256_t px,
+ const uint256_t py,
+ __global uint256_t* xPtr,
+ __global uint256_t* yPtr,
+ const int i,
+ const int batchIdx,
+ __global uint256_t* chain,
+ uint256_t* inverse,
+ uint256_t* newX,
+ uint256_t* newY)
+{
+ __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+ __private int dim = get_global_size(0);
+ __private uint256_t x = xPtr[i];
+ __private uint256_t y = yPtr[i];
+
+ uint256_t s;
+ __private unsigned int tmp[8];
+
+ if(batchIdx != 0) {
+ uint256_t c;
+
+ c = chain[(batchIdx - 1) * dim + gid];
+ mulModP(inverse->v, c.v, s.v);
+
+ subModP256k(px.v, x.v, tmp);
+ mulModP(tmp, inverse->v, inverse->v);
+ } else {
+ s = *inverse;
+ }
+
+ subModP256k(py.v, y.v, tmp);
+
+ mulModP(tmp, s.v, s.v);
+
+ // Rx = s^2 - Gx - Qx
+ mulModP(s.v, s.v, tmp);
+
+ subModP256k(tmp, px.v, newX->v);
+ subModP256k(newX->v, x.v, newX->v);
+
+ // Ry = s(px - rx) - py
+ subModP256k(px.v, newX->v, tmp);
+ mulModP(s.v, tmp, newY->v);
+ subModP256k(newY->v, py.v, newY->v);
+}
void completeBatchAddWithDouble256k(
- uint256_t px,
- uint256_t py,
+ const uint256_t px,
+ const uint256_t py,
__global const uint256_t* xPtr,
__global const uint256_t* yPtr,
- int i,
- int batchIdx,
+ const int i,
+ const int batchIdx,
__global uint256_t* chain,
uint256_t* inverse,
uint256_t* newX,
uint256_t* newY)
{
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
- int dim = get_global_size(0);
- uint256_t s;
- uint256_t x;
- uint256_t y;
+ __private int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
+ __private int dim = get_global_size(0);
+ __private uint256_t s;
+ __private uint256_t x;
+ __private uint256_t y;
x = xPtr[i];
y = yPtr[i];
if(batchIdx >= 1) {
- uint256_t c;
+ __private uint256_t c;
c = chain[(batchIdx - 1) * dim + gid];
- s = mulModP256k(*inverse, c);
+ mulModP(inverse->v, c.v, s.v);
uint256_t diff;
- if(equal256k(px, x)) {
- diff = addModP256k(py, py);
+ if(equal256k(px.v, x.v)) {
+ addModP256k(py.v, py.v, diff.v);
} else {
- diff = subModP256k(px, x);
+ subModP256k(px.v, x.v, diff.v);
}
- *inverse = mulModP256k(diff, *inverse);
+ mulModP(diff.v, inverse->v, inverse->v);
} else {
s = *inverse;
}
- if(equal256k(px, x)) {
+ if(equal256k(px.v, x.v)) {
// currently s = 1 / 2y
- uint256_t x2;
- uint256_t tx2;
- uint256_t x3;
+ __private uint256_t x2;
+ __private uint256_t tx2;
// 3x^2
- x2 = mulModP256k(x, x);
- tx2 = addModP256k(x2, x2);
- tx2 = addModP256k(x2, tx2);
+ mulModP(x.v, x.v, x2.v);
+ addModP256k(x2.v, x2.v, tx2.v);
+ addModP256k(x2.v, tx2.v, tx2.v);
// s = 3x^2 * 1/2y
- s = mulModP256k(tx2, s);
+ mulModP(tx2.v, s.v, s.v);
// s^2
- uint256_t s2;
- s2 = mulModP256k(s, s);
+ __private uint256_t s2;
+ mulModP(s.v, s.v, s2.v);
// Rx = s^2 - 2px
- *newX = subModP256k(s2, x);
- *newX = subModP256k(*newX, x);
+ subModP256k(s2.v, x.v, newX->v);
+ subModP256k(newX->v, x.v, newX->v);
// Ry = s(px - rx) - py
- uint256_t k;
- k = subModP256k(px, *newX);
- *newY = mulModP256k(s, k);
- *newY = subModP256k(*newY, py);
+ __private uint256_t k;
+ subModP256k(px.v, newX->v, k.v);
+ mulModP(s.v, k.v, newY->v);
+ subModP256k(newY->v, py.v,newY->v);
} else {
- uint256_t rise;
- rise = subModP256k(py, y);
+ __private uint256_t rise;
+ subModP256k(py.v, y.v, rise.v);
- s = mulModP256k(rise, s);
+ mulModP(rise.v, s.v, s.v);
// Rx = s^2 - Gx - Qx
- uint256_t s2;
- s2 = mulModP256k(s, s);
+ __private uint256_t s2;
+ mulModP(s.v, s.v, s2.v);
- *newX = subModP256k(s2, px);
- *newX = subModP256k(*newX, x);
+ subModP256k(s2.v, px.v, newX->v);
+ subModP256k(newX->v, x.v,newX->v);
// Ry = s(px - rx) - py
- uint256_t k;
- k = subModP256k(px, *newX);
- *newY = mulModP256k(s, k);
- *newY = subModP256k(*newY, py);
+ __private uint256_t k;
+ subModP256k(px.v, newX->v, k.v);
+ mulModP(s.v, k.v, newY->v);
+ subModP256k(newY->v, py.v, newY->v);
}
}
-
-void completeBatchAdd256k(
- uint256_t px,
- uint256_t py,
- __global uint256_t* xPtr,
- __global uint256_t* yPtr,
- int i,
- int batchIdx,
- __global uint256_t* chain,
- uint256_t* inverse,
- uint256_t* newX,
- uint256_t* newY)
-{
- int gid = get_local_size(0) * get_group_id(0) + get_local_id(0);
- int dim = get_global_size(0);
-
- uint256_t s;
- uint256_t x;
-
- x = xPtr[i];
-
- if(batchIdx >= 1) {
- uint256_t c;
-
- c = chain[(batchIdx - 1) * dim + gid];
- s = mulModP256k(*inverse, c);
-
- uint256_t diff;
- diff = subModP256k(px, x);
- *inverse = mulModP256k(diff, *inverse);
- } else {
- s = *inverse;
- }
-
- uint256_t y;
- y = yPtr[i];
-
- uint256_t rise;
- rise = subModP256k(py, y);
-
- s = mulModP256k(rise, s);
-
- // Rx = s^2 - Gx - Qx
- uint256_t s2;
- s2 = mulModP256k(s, s);
-
- *newX = subModP256k(s2, px);
- *newX = subModP256k(*newX, x);
-
- // Ry = s(px - rx) - py
- uint256_t k;
- k = subModP256k(px, *newX);
- *newY = mulModP256k(s, k);
- *newY = subModP256k(*newY, py);
-}
-
-
-uint256_t doBatchInverse256k(uint256_t x)
+unsigned int readWord256k(__global const uint256_t* ara, const int idx, const int word)
{
- return invModP256k(x);
+ return ara[idx].v[word];
}
#endif
diff --git a/clMath/sha256.cl b/clMath/sha256.cl
index 7cd26ff..4c8ffd6 100644
--- a/clMath/sha256.cl
+++ b/clMath/sha256.cl
@@ -1,6 +1,5 @@
-#ifndef _SHA256_CL
-#define _SHA256_CL
-
+#ifndef SHA256_CL
+#define SHA256_CL
__constant unsigned int _K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
@@ -26,7 +25,6 @@ __constant unsigned int _IV[8] = {
#define rotr(x, n) ((x) >> (n)) ^ ((x) << (32 - (n)))
-
#define MAJ(a, b, c) (((a) & (b)) ^ ((a) & (c)) ^ ((b) & (c)))
#define CH(e, f, g) (((e) & (f)) ^ (~(e) & (g)))
@@ -35,17 +33,25 @@ __constant unsigned int _IV[8] = {
#define s1(x) (rotr((x), 17) ^ rotr((x), 19) ^ ((x) >> 10))
-#define round(a, b, c, d, e, f, g, h, m, k)\
+#define roundSha(a, b, c, d, e, f, g, h, m, k)\
t = CH((e), (f), (g)) + (rotr((e), 6) ^ rotr((e), 11) ^ rotr((e), 25)) + (k) + (m);\
(d) += (t) + (h);\
(h) += (t) + MAJ((a), (b), (c)) + (rotr((a), 2) ^ rotr((a), 13) ^ rotr((a), 22))
-
void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned int digest[8])
{
- unsigned int a, b, c, d, e, f, g, h;
- unsigned int w[16];
- unsigned int t;
+ __private unsigned int a, b, c, d, e, f, g, h;
+ __private unsigned int w[16];
+ __private unsigned int t;
+
+ a = _IV[0];
+ b = _IV[1];
+ c = _IV[2];
+ d = _IV[3];
+ e = _IV[4];
+ f = _IV[5];
+ g = _IV[6];
+ h = _IV[7];
// 0x04 || x || y
w[0] = (x[0] >> 8) | 0x04000000;
@@ -65,31 +71,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = (y[6] >> 8) | (y[5] << 24);
w[15] = (y[7] >> 8) | (y[6] << 24);
- a = _IV[0];
- b = _IV[1];
- c = _IV[2];
- d = _IV[3];
- e = _IV[4];
- f = _IV[5];
- g = _IV[6];
- h = _IV[7];
-
- round(a, b, c, d, e, f, g, h, w[0], _K[0]);
- round(h, a, b, c, d, e, f, g, w[1], _K[1]);
- round(g, h, a, b, c, d, e, f, w[2], _K[2]);
- round(f, g, h, a, b, c, d, e, w[3], _K[3]);
- round(e, f, g, h, a, b, c, d, w[4], _K[4]);
- round(d, e, f, g, h, a, b, c, w[5], _K[5]);
- round(c, d, e, f, g, h, a, b, w[6], _K[6]);
- round(b, c, d, e, f, g, h, a, w[7], _K[7]);
- round(a, b, c, d, e, f, g, h, w[8], _K[8]);
- round(h, a, b, c, d, e, f, g, w[9], _K[9]);
- round(g, h, a, b, c, d, e, f, w[10], _K[10]);
- round(f, g, h, a, b, c, d, e, w[11], _K[11]);
- round(e, f, g, h, a, b, c, d, w[12], _K[12]);
- round(d, e, f, g, h, a, b, c, w[13], _K[13]);
- round(c, d, e, f, g, h, a, b, w[14], _K[14]);
- round(b, c, d, e, f, g, h, a, w[15], _K[15]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[3]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[4]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[5]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[6]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[7]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[8]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[9]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[10]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[11]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[12]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[13]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[14]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -108,22 +105,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[16]);
- round(h, a, b, c, d, e, f, g, w[1], _K[17]);
- round(g, h, a, b, c, d, e, f, w[2], _K[18]);
- round(f, g, h, a, b, c, d, e, w[3], _K[19]);
- round(e, f, g, h, a, b, c, d, w[4], _K[20]);
- round(d, e, f, g, h, a, b, c, w[5], _K[21]);
- round(c, d, e, f, g, h, a, b, w[6], _K[22]);
- round(b, c, d, e, f, g, h, a, w[7], _K[23]);
- round(a, b, c, d, e, f, g, h, w[8], _K[24]);
- round(h, a, b, c, d, e, f, g, w[9], _K[25]);
- round(g, h, a, b, c, d, e, f, w[10], _K[26]);
- round(f, g, h, a, b, c, d, e, w[11], _K[27]);
- round(e, f, g, h, a, b, c, d, w[12], _K[28]);
- round(d, e, f, g, h, a, b, c, w[13], _K[29]);
- round(c, d, e, f, g, h, a, b, w[14], _K[30]);
- round(b, c, d, e, f, g, h, a, w[15], _K[31]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -142,22 +139,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[32]);
- round(h, a, b, c, d, e, f, g, w[1], _K[33]);
- round(g, h, a, b, c, d, e, f, w[2], _K[34]);
- round(f, g, h, a, b, c, d, e, w[3], _K[35]);
- round(e, f, g, h, a, b, c, d, w[4], _K[36]);
- round(d, e, f, g, h, a, b, c, w[5], _K[37]);
- round(c, d, e, f, g, h, a, b, w[6], _K[38]);
- round(b, c, d, e, f, g, h, a, w[7], _K[39]);
- round(a, b, c, d, e, f, g, h, w[8], _K[40]);
- round(h, a, b, c, d, e, f, g, w[9], _K[41]);
- round(g, h, a, b, c, d, e, f, w[10], _K[42]);
- round(f, g, h, a, b, c, d, e, w[11], _K[43]);
- round(e, f, g, h, a, b, c, d, w[12], _K[44]);
- round(d, e, f, g, h, a, b, c, w[13], _K[45]);
- round(c, d, e, f, g, h, a, b, w[14], _K[46]);
- round(b, c, d, e, f, g, h, a, w[15], _K[47]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -176,22 +173,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[48]);
- round(h, a, b, c, d, e, f, g, w[1], _K[49]);
- round(g, h, a, b, c, d, e, f, w[2], _K[50]);
- round(f, g, h, a, b, c, d, e, w[3], _K[51]);
- round(e, f, g, h, a, b, c, d, w[4], _K[52]);
- round(d, e, f, g, h, a, b, c, w[5], _K[53]);
- round(c, d, e, f, g, h, a, b, w[6], _K[54]);
- round(b, c, d, e, f, g, h, a, w[7], _K[55]);
- round(a, b, c, d, e, f, g, h, w[8], _K[56]);
- round(h, a, b, c, d, e, f, g, w[9], _K[57]);
- round(g, h, a, b, c, d, e, f, w[10], _K[58]);
- round(f, g, h, a, b, c, d, e, w[11], _K[59]);
- round(e, f, g, h, a, b, c, d, w[12], _K[60]);
- round(d, e, f, g, h, a, b, c, w[13], _K[61]);
- round(c, d, e, f, g, h, a, b, w[14], _K[62]);
- round(b, c, d, e, f, g, h, a, w[15], _K[63]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]);
a += _IV[0];
b += _IV[1];
@@ -203,35 +200,34 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
h += _IV[7];
// store the intermediate hash value
- unsigned int tmp[8];
- tmp[0] = a;
- tmp[1] = b;
- tmp[2] = c;
- tmp[3] = d;
- tmp[4] = e;
- tmp[5] = f;
- tmp[6] = g;
- tmp[7] = h;
+ digest[0] = a;
+ digest[1] = b;
+ digest[2] = c;
+ digest[3] = d;
+ digest[4] = e;
+ digest[5] = f;
+ digest[6] = g;
+ digest[7] = h;
w[0] = (y[7] << 24) | 0x00800000;
- w[15] = 65 * 8;
-
- round(a, b, c, d, e, f, g, h, w[0], _K[0]);
- round(h, a, b, c, d, e, f, g, 0, _K[1]);
- round(g, h, a, b, c, d, e, f, 0, _K[2]);
- round(f, g, h, a, b, c, d, e, 0, _K[3]);
- round(e, f, g, h, a, b, c, d, 0, _K[4]);
- round(d, e, f, g, h, a, b, c, 0, _K[5]);
- round(c, d, e, f, g, h, a, b, 0, _K[6]);
- round(b, c, d, e, f, g, h, a, 0, _K[7]);
- round(a, b, c, d, e, f, g, h, 0, _K[8]);
- round(h, a, b, c, d, e, f, g, 0, _K[9]);
- round(g, h, a, b, c, d, e, f, 0, _K[10]);
- round(f, g, h, a, b, c, d, e, 0, _K[11]);
- round(e, f, g, h, a, b, c, d, 0, _K[12]);
- round(d, e, f, g, h, a, b, c, 0, _K[13]);
- round(c, d, e, f, g, h, a, b, 0, _K[14]);
- round(b, c, d, e, f, g, h, a, w[15], _K[15]);
+ w[15] = 520; // 65 * 8
+
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]);
+ roundSha(h, a, b, c, d, e, f, g, 0, _K[1]);
+ roundSha(g, h, a, b, c, d, e, f, 0, _K[2]);
+ roundSha(f, g, h, a, b, c, d, e, 0, _K[3]);
+ roundSha(e, f, g, h, a, b, c, d, 0, _K[4]);
+ roundSha(d, e, f, g, h, a, b, c, 0, _K[5]);
+ roundSha(c, d, e, f, g, h, a, b, 0, _K[6]);
+ roundSha(b, c, d, e, f, g, h, a, 0, _K[7]);
+ roundSha(a, b, c, d, e, f, g, h, 0, _K[8]);
+ roundSha(h, a, b, c, d, e, f, g, 0, _K[9]);
+ roundSha(g, h, a, b, c, d, e, f, 0, _K[10]);
+ roundSha(f, g, h, a, b, c, d, e, 0, _K[11]);
+ roundSha(e, f, g, h, a, b, c, d, 0, _K[12]);
+ roundSha(d, e, f, g, h, a, b, c, 0, _K[13]);
+ roundSha(c, d, e, f, g, h, a, b, 0, _K[14]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]);
w[0] = w[0] + s0(0) + 0 + s1(0);
w[1] = 0 + s0(0) + 0 + s1(w[15]);
@@ -250,22 +246,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[16]);
- round(h, a, b, c, d, e, f, g, w[1], _K[17]);
- round(g, h, a, b, c, d, e, f, w[2], _K[18]);
- round(f, g, h, a, b, c, d, e, w[3], _K[19]);
- round(e, f, g, h, a, b, c, d, w[4], _K[20]);
- round(d, e, f, g, h, a, b, c, w[5], _K[21]);
- round(c, d, e, f, g, h, a, b, w[6], _K[22]);
- round(b, c, d, e, f, g, h, a, w[7], _K[23]);
- round(a, b, c, d, e, f, g, h, w[8], _K[24]);
- round(h, a, b, c, d, e, f, g, w[9], _K[25]);
- round(g, h, a, b, c, d, e, f, w[10], _K[26]);
- round(f, g, h, a, b, c, d, e, w[11], _K[27]);
- round(e, f, g, h, a, b, c, d, w[12], _K[28]);
- round(d, e, f, g, h, a, b, c, w[13], _K[29]);
- round(c, d, e, f, g, h, a, b, w[14], _K[30]);
- round(b, c, d, e, f, g, h, a, w[15], _K[31]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -284,22 +280,22 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[32]);
- round(h, a, b, c, d, e, f, g, w[1], _K[33]);
- round(g, h, a, b, c, d, e, f, w[2], _K[34]);
- round(f, g, h, a, b, c, d, e, w[3], _K[35]);
- round(e, f, g, h, a, b, c, d, w[4], _K[36]);
- round(d, e, f, g, h, a, b, c, w[5], _K[37]);
- round(c, d, e, f, g, h, a, b, w[6], _K[38]);
- round(b, c, d, e, f, g, h, a, w[7], _K[39]);
- round(a, b, c, d, e, f, g, h, w[8], _K[40]);
- round(h, a, b, c, d, e, f, g, w[9], _K[41]);
- round(g, h, a, b, c, d, e, f, w[10], _K[42]);
- round(f, g, h, a, b, c, d, e, w[11], _K[43]);
- round(e, f, g, h, a, b, c, d, w[12], _K[44]);
- round(d, e, f, g, h, a, b, c, w[13], _K[45]);
- round(c, d, e, f, g, h, a, b, w[14], _K[46]);
- round(b, c, d, e, f, g, h, a, w[15], _K[47]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -318,38 +314,38 @@ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[48]);
- round(h, a, b, c, d, e, f, g, w[1], _K[49]);
- round(g, h, a, b, c, d, e, f, w[2], _K[50]);
- round(f, g, h, a, b, c, d, e, w[3], _K[51]);
- round(e, f, g, h, a, b, c, d, w[4], _K[52]);
- round(d, e, f, g, h, a, b, c, w[5], _K[53]);
- round(c, d, e, f, g, h, a, b, w[6], _K[54]);
- round(b, c, d, e, f, g, h, a, w[7], _K[55]);
- round(a, b, c, d, e, f, g, h, w[8], _K[56]);
- round(h, a, b, c, d, e, f, g, w[9], _K[57]);
- round(g, h, a, b, c, d, e, f, w[10], _K[58]);
- round(f, g, h, a, b, c, d, e, w[11], _K[59]);
- round(e, f, g, h, a, b, c, d, w[12], _K[60]);
- round(d, e, f, g, h, a, b, c, w[13], _K[61]);
- round(c, d, e, f, g, h, a, b, w[14], _K[62]);
- round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
- digest[0] = tmp[0] + a;
- digest[1] = tmp[1] + b;
- digest[2] = tmp[2] + c;
- digest[3] = tmp[3] + d;
- digest[4] = tmp[4] + e;
- digest[5] = tmp[5] + f;
- digest[6] = tmp[6] + g;
- digest[7] = tmp[7] + h;
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]);
+
+ digest[0] += a;
+ digest[1] += b;
+ digest[2] += c;
+ digest[3] += d;
+ digest[4] += e;
+ digest[5] += f;
+ digest[6] += g;
+ digest[7] += h;
}
void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8])
{
- unsigned int a, b, c, d, e, f, g, h;
- unsigned int w[16];
- unsigned int t;
+ __private unsigned int a, b, c, d, e, f, g, h;
+ __private unsigned int w[16];
+ __private unsigned int t;
// 0x03 || x or 0x02 || x
w[0] = 0x02000000 | ((yParity & 1) << 24) | (x[0] >> 8);
@@ -362,7 +358,7 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
w[6] = (x[6] >> 8) | (x[5] << 24);
w[7] = (x[7] >> 8) | (x[6] << 24);
w[8] = (x[7] << 24) | 0x00800000;
- w[15] = 33 * 8;
+ w[15] = 264; // 33 * 8
a = _IV[0];
b = _IV[1];
@@ -373,22 +369,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
g = _IV[6];
h = _IV[7];
- round(a, b, c, d, e, f, g, h, w[0], _K[0]);
- round(h, a, b, c, d, e, f, g, w[1], _K[1]);
- round(g, h, a, b, c, d, e, f, w[2], _K[2]);
- round(f, g, h, a, b, c, d, e, w[3], _K[3]);
- round(e, f, g, h, a, b, c, d, w[4], _K[4]);
- round(d, e, f, g, h, a, b, c, w[5], _K[5]);
- round(c, d, e, f, g, h, a, b, w[6], _K[6]);
- round(b, c, d, e, f, g, h, a, w[7], _K[7]);
- round(a, b, c, d, e, f, g, h, w[8], _K[8]);
- round(h, a, b, c, d, e, f, g, 0, _K[9]);
- round(g, h, a, b, c, d, e, f, 0, _K[10]);
- round(f, g, h, a, b, c, d, e, 0, _K[11]);
- round(e, f, g, h, a, b, c, d, 0, _K[12]);
- round(d, e, f, g, h, a, b, c, 0, _K[13]);
- round(c, d, e, f, g, h, a, b, 0, _K[14]);
- round(b, c, d, e, f, g, h, a, w[15], _K[15]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[0]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[1]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[2]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[3]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[4]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[5]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[6]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[7]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[8]);
+ roundSha(h, a, b, c, d, e, f, g, 0, _K[9]);
+ roundSha(g, h, a, b, c, d, e, f, 0, _K[10]);
+ roundSha(f, g, h, a, b, c, d, e, 0, _K[11]);
+ roundSha(e, f, g, h, a, b, c, d, 0, _K[12]);
+ roundSha(d, e, f, g, h, a, b, c, 0, _K[13]);
+ roundSha(c, d, e, f, g, h, a, b, 0, _K[14]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[15]);
w[0] = w[0] + s0(w[1]) + 0 + s1(0);
w[1] = w[1] + s0(w[2]) + 0 + s1(w[15]);
@@ -407,22 +403,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[16]);
- round(h, a, b, c, d, e, f, g, w[1], _K[17]);
- round(g, h, a, b, c, d, e, f, w[2], _K[18]);
- round(f, g, h, a, b, c, d, e, w[3], _K[19]);
- round(e, f, g, h, a, b, c, d, w[4], _K[20]);
- round(d, e, f, g, h, a, b, c, w[5], _K[21]);
- round(c, d, e, f, g, h, a, b, w[6], _K[22]);
- round(b, c, d, e, f, g, h, a, w[7], _K[23]);
- round(a, b, c, d, e, f, g, h, w[8], _K[24]);
- round(h, a, b, c, d, e, f, g, w[9], _K[25]);
- round(g, h, a, b, c, d, e, f, w[10], _K[26]);
- round(f, g, h, a, b, c, d, e, w[11], _K[27]);
- round(e, f, g, h, a, b, c, d, w[12], _K[28]);
- round(d, e, f, g, h, a, b, c, w[13], _K[29]);
- round(c, d, e, f, g, h, a, b, w[14], _K[30]);
- round(b, c, d, e, f, g, h, a, w[15], _K[31]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[16]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[17]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[18]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[19]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[20]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[21]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[22]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[23]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[24]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[25]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[26]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[27]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[28]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[29]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[30]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[31]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
@@ -441,22 +437,22 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[32]);
- round(h, a, b, c, d, e, f, g, w[1], _K[33]);
- round(g, h, a, b, c, d, e, f, w[2], _K[34]);
- round(f, g, h, a, b, c, d, e, w[3], _K[35]);
- round(e, f, g, h, a, b, c, d, w[4], _K[36]);
- round(d, e, f, g, h, a, b, c, w[5], _K[37]);
- round(c, d, e, f, g, h, a, b, w[6], _K[38]);
- round(b, c, d, e, f, g, h, a, w[7], _K[39]);
- round(a, b, c, d, e, f, g, h, w[8], _K[40]);
- round(h, a, b, c, d, e, f, g, w[9], _K[41]);
- round(g, h, a, b, c, d, e, f, w[10], _K[42]);
- round(f, g, h, a, b, c, d, e, w[11], _K[43]);
- round(e, f, g, h, a, b, c, d, w[12], _K[44]);
- round(d, e, f, g, h, a, b, c, w[13], _K[45]);
- round(c, d, e, f, g, h, a, b, w[14], _K[46]);
- round(b, c, d, e, f, g, h, a, w[15], _K[47]);
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[32]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[33]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[34]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[35]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[36]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[37]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[38]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[39]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[40]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[41]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[42]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[43]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[44]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[45]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[46]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[47]);
w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
@@ -476,39 +472,30 @@ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, un
w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
- round(a, b, c, d, e, f, g, h, w[0], _K[48]);
- round(h, a, b, c, d, e, f, g, w[1], _K[49]);
- round(g, h, a, b, c, d, e, f, w[2], _K[50]);
- round(f, g, h, a, b, c, d, e, w[3], _K[51]);
- round(e, f, g, h, a, b, c, d, w[4], _K[52]);
- round(d, e, f, g, h, a, b, c, w[5], _K[53]);
- round(c, d, e, f, g, h, a, b, w[6], _K[54]);
- round(b, c, d, e, f, g, h, a, w[7], _K[55]);
- round(a, b, c, d, e, f, g, h, w[8], _K[56]);
- round(h, a, b, c, d, e, f, g, w[9], _K[57]);
- round(g, h, a, b, c, d, e, f, w[10], _K[58]);
- round(f, g, h, a, b, c, d, e, w[11], _K[59]);
- round(e, f, g, h, a, b, c, d, w[12], _K[60]);
- round(d, e, f, g, h, a, b, c, w[13], _K[61]);
- round(c, d, e, f, g, h, a, b, w[14], _K[62]);
- round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
- a += _IV[0];
- b += _IV[1];
- c += _IV[2];
- d += _IV[3];
- e += _IV[4];
- f += _IV[5];
- g += _IV[6];
- h += _IV[7];
-
- digest[0] = a;
- digest[1] = b;
- digest[2] = c;
- digest[3] = d;
- digest[4] = e;
- digest[5] = f;
- digest[6] = g;
- digest[7] = h;
+ roundSha(a, b, c, d, e, f, g, h, w[0], _K[48]);
+ roundSha(h, a, b, c, d, e, f, g, w[1], _K[49]);
+ roundSha(g, h, a, b, c, d, e, f, w[2], _K[50]);
+ roundSha(f, g, h, a, b, c, d, e, w[3], _K[51]);
+ roundSha(e, f, g, h, a, b, c, d, w[4], _K[52]);
+ roundSha(d, e, f, g, h, a, b, c, w[5], _K[53]);
+ roundSha(c, d, e, f, g, h, a, b, w[6], _K[54]);
+ roundSha(b, c, d, e, f, g, h, a, w[7], _K[55]);
+ roundSha(a, b, c, d, e, f, g, h, w[8], _K[56]);
+ roundSha(h, a, b, c, d, e, f, g, w[9], _K[57]);
+ roundSha(g, h, a, b, c, d, e, f, w[10], _K[58]);
+ roundSha(f, g, h, a, b, c, d, e, w[11], _K[59]);
+ roundSha(e, f, g, h, a, b, c, d, w[12], _K[60]);
+ roundSha(d, e, f, g, h, a, b, c, w[13], _K[61]);
+ roundSha(c, d, e, f, g, h, a, b, w[14], _K[62]);
+ roundSha(b, c, d, e, f, g, h, a, w[15], _K[63]);
+
+ digest[0] = a + _IV[0];
+ digest[1] = b + _IV[1];
+ digest[2] = c + _IV[2];
+ digest[3] = d + _IV[3];
+ digest[4] = e + _IV[4];
+ digest[5] = f + _IV[5];
+ digest[6] = g + _IV[6];
+ digest[7] = h + _IV[7];
}
#endif
diff --git a/clUtil/clContext.cpp b/clUtil/clContext.cpp
index 0a27518..a50eedb 100644
--- a/clUtil/clContext.cpp
+++ b/clUtil/clContext.cpp
@@ -15,7 +15,7 @@ cl::CLContext::CLContext(cl_device_id device)
_ctx = clCreateContext(0, 1, &_device, NULL, NULL, &err);
clCall(err);
- _queue = clCreateCommandQueue(_ctx, _device, 0, &err);
+ _queue = clCreateCommandQueueWithProperties(_ctx, _device, NULL, &err);
clCall(err);
}
@@ -94,7 +94,7 @@ cl::CLProgram::CLProgram(cl::CLContext &ctx, std::string srcFile, std::string op
cl_int err;
if(util::toLower(_ctx.getDeviceVendor()).find("intel") != std::string::npos) {
- options += "-DDEVICE_VENDOR_INTEL";
+ options += " -DDEVICE_VENDOR_INTEL";
}
_prog = clCreateProgramWithSource(ctx.getContext(), 1, &ptr, &len, &err);
@@ -246,4 +246,4 @@ size_t cl::CLKernel::getWorkGroupSize()
cl::CLKernel::~CLKernel()
{
clReleaseKernel(_kernel);
-}
\ No newline at end of file
+}
diff --git a/clUtil/clContext.h b/clUtil/clContext.h
index dd8f4ec..4b28027 100644
--- a/clUtil/clContext.h
+++ b/clUtil/clContext.h
@@ -1,5 +1,5 @@
-#ifndef _CL_CONTEXT_H
-#define _CL_CONTEXT_H
+#ifndef CL_CONTEXT_H
+#define CL_CONTEXT_H
#include
#include "clutil.h"
@@ -57,9 +57,6 @@ class CLProgram {
cl_program getProgram();
CLContext& getContext();
-
- std::string getBuildLog();
-
};
@@ -115,7 +112,7 @@ class CLKernel {
clCall(clSetKernelArg(_kernel, 2, sizeof(arg3), &arg3));
clCall(clSetKernelArg(_kernel, 3, sizeof(arg4), &arg4));
clCall(clSetKernelArg(_kernel, 4, sizeof(arg5), &arg5));
- clCall(clSetKernelArg(_kernel, 4, sizeof(arg6), &arg6));
+ clCall(clSetKernelArg(_kernel, 5, sizeof(arg6), &arg6));
}
template
@@ -245,30 +242,6 @@ class CLKernel {
clCall(clSetKernelArg(_kernel, 15, sizeof(T16), &arg16));
}
- template
- void set_args(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10, T11 arg11, T12 arg12,
- T13 arg13, T14 arg14, T15 arg15, T16 arg16)
- {
- clCall(clSetKernelArg(_kernel, 0, sizeof(T1), &arg1));
- clCall(clSetKernelArg(_kernel, 1, sizeof(T2), &arg2));
- clCall(clSetKernelArg(_kernel, 2, sizeof(T3), &arg3));
- clCall(clSetKernelArg(_kernel, 3, sizeof(T4), &arg4));
- clCall(clSetKernelArg(_kernel, 4, sizeof(T5), &arg5));
- clCall(clSetKernelArg(_kernel, 5, sizeof(T6), &arg6));
- clCall(clSetKernelArg(_kernel, 6, sizeof(T7), &arg7));
- clCall(clSetKernelArg(_kernel, 7, sizeof(T8), &arg8));
- clCall(clSetKernelArg(_kernel, 8, sizeof(T9), &arg9));
- clCall(clSetKernelArg(_kernel, 9, sizeof(T10), &arg10));
- clCall(clSetKernelArg(_kernel, 10, sizeof(T11), &arg11));
- clCall(clSetKernelArg(_kernel, 11, sizeof(T12), &arg12));
- clCall(clSetKernelArg(_kernel, 12, sizeof(T13), &arg13));
- clCall(clSetKernelArg(_kernel, 13, sizeof(T14), &arg14));
- clCall(clSetKernelArg(_kernel, 14, sizeof(T15), &arg15));
- clCall(clSetKernelArg(_kernel, 15, sizeof(T16), &arg16));
- }
-
template
@@ -330,4 +303,4 @@ class CLKernel {
}
-#endif
\ No newline at end of file
+#endif
diff --git a/clUtil/clError.cpp b/clUtil/clError.cpp
new file mode 100644
index 0000000..c5e7f57
--- /dev/null
+++ b/clUtil/clError.cpp
@@ -0,0 +1,142 @@
+#include "clutil.h"
+
+std::string cl::getOpenCLErrorName(cl_int errorCode)
+{
+ switch (errorCode)
+ {
+ case CL_SUCCESS: return "CL_SUCCESS";
+ case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
+ case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
+ case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
+ case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
+ case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
+ case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
+ case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
+ case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
+ case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
+ case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
+ case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
+ case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
+ case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
+ case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
+ case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
+ case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
+ case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
+ case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
+ case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
+ case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
+ case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
+ case CL_INVALID_PLATFORM: return "CL_INVALID_PLATFORM";
+ case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
+ case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
+ case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES";
+ case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
+ case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
+ case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
+ case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
+ case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
+ case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
+ case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
+ case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
+ case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
+ case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE";
+ case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
+ case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION";
+ case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
+ case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
+ case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
+ case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE";
+ case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS";
+ case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION";
+ case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE";
+ case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE";
+ case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET";
+ case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST";
+ case CL_INVALID_EVENT: return "CL_INVALID_EVENT";
+ case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
+ case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT";
+ case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE";
+ case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL";
+ case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
+ case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
+ case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR";
+ case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS";
+ case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS";
+ case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT";
+ case CL_INVALID_PIPE_SIZE: return "CL_INVALID_PIPE_SIZE";
+ case CL_INVALID_DEVICE_QUEUE: return "CL_INVALID_DEVICE_QUEUE";
+ case CL_INVALID_SPEC_ID: return "CL_INVALID_SPEC_ID";
+ case CL_MAX_SIZE_RESTRICTION_EXCEEDED: return "CL_MAX_SIZE_RESTRICTION_EXCEEDED";
+
+ default:
+ return "CL_UNKNOWN_ERROR_CODE";
+ }
+}
+
+// from http://www.techdarting.com/2014/01/opencl-errors.html
+std::string cl::getOpenCLErrorDescription(cl_int err) {
+ switch (err) {
+ case CL_SUCCESS: return "Everything is good!";
+ case CL_DEVICE_NOT_FOUND: return "No OpenCL devices that matched given device type were found";
+ case CL_DEVICE_NOT_AVAILABLE: return "No OpenCL compatible device was found";
+ case CL_COMPILER_NOT_AVAILABLE: return "OpenCL Compiler perhaps failed to configure itself, or check your OpenCL installation";
+ case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Failed to allocate memory for buffer object";
+ case CL_OUT_OF_RESOURCES: return "failure to allocate resources required by the OpenCL implementation on the device";
+ case CL_OUT_OF_HOST_MEMORY: return "failure to allocate resources required by the OpenCL implementation on the host";
+ case CL_PROFILING_INFO_NOT_AVAILABLE: return "returned by clGetEventProfilingInfo, if the CL_QUEUE_PROFILING_ENABLE flag is not set for the command-queue and if the profiling information is currently not available";
+ case CL_MEM_COPY_OVERLAP: return "if source and destination buffers are the same buffer object and the source and destination regions overlap";
+ case CL_IMAGE_FORMAT_MISMATCH: return "src and dst image do not use the same image format";
+ case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "the image format is not supported.";
+ case CL_BUILD_PROGRAM_FAILURE: return "program build error for given device, Use clGetProgramBuildInfo API call to get the build log of the kernel compilation.";
+ case CL_MAP_FAILURE: return "failed to map the requested region into the host address space. This error does not occur for buffer objects created with CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR";
+ case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "no devices in given context associated with buffer for which the origin value is aligned to the CL_DEVICE_MEM_BASE_ADDR_ALIGN value";
+ case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "returned by clWaitForEvents(), execution status of any of the events in event list is a negative integer value i.e., error";
+ case CL_COMPILE_PROGRAM_FAILURE: return "failed to compile the program source. Error occurs if clCompileProgram does not return until the compile has completed";
+ case CL_LINKER_NOT_AVAILABLE: return "Linker unavailable";
+ case CL_LINK_PROGRAM_FAILURE: return "failed to link the compiled binaries and perhaps libraries";
+ case CL_DEVICE_PARTITION_FAILED: return "given partition name is supported by the implementation but input device couldn't be partitioned further";
+ case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "argument information is not available for the given kernel";
+ case CL_INVALID_VALUE: return "values passed in the flags parameter is not valid";
+ case CL_INVALID_DEVICE_TYPE: return "device type specified is not valid, its returned by clCreateContextFromType / clGetDeviceIDs";
+ case CL_INVALID_PLATFORM: return "the specified platform is not a valid platform, its returned by clGetPlatformInfo /clGetDeviceIDs / clCreateContext / clCreateContextFromType";
+ case CL_INVALID_DEVICE: return "device/s specified are not valid";
+ case CL_INVALID_CONTEXT: return "the given context is invalid OpenCL context, or the context associated with certain parameters are not the same";
+ case CL_INVALID_QUEUE_PROPERTIES: return "specified properties are valid but are not supported by the device, its returned by clCreateCommandQueue / clSetCommandQueueProperty";
+ case CL_INVALID_COMMAND_QUEUE: return "the specified command-queue is not a valid command-queue";
+ case CL_INVALID_HOST_PTR: return "host pointer is NULL and CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are set in flags or if host_ptr is not NULL but CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are not set in flags. returned by clCreateBuffer / clCreateImage2D / clCreateImage3D";
+ case CL_INVALID_MEM_OBJECT: return "the passed parameter is not a valid memory, image, or buffer object";
+ case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "image format specified is not valid or is NULL, clCreateImage2D /clCreateImage3D returns this.";
+ case CL_INVALID_IMAGE_SIZE: return "Its returned by create Image functions 2D/3D, if specified image width or height are outbound or 0";
+ case CL_INVALID_SAMPLER: return "specified sampler is an invalid sampler object";
+ case CL_INVALID_BINARY: return "program binary is not a valid binary for the specified device, returned by clBuildProgram / clCreateProgramWithBinary";
+ case CL_INVALID_BUILD_OPTIONS: return "the given build options are not valid";
+ case CL_INVALID_PROGRAM: return "the given program is an invalid program object, returned by clRetainProgram / clReleaseProgram / clBuildProgram / clGetProgramInfo / clGetProgramBuildInfo / clCreateKernel / clCreateKernelsInProgram";
+ case CL_INVALID_PROGRAM_EXECUTABLE: return "if there is no successfully built executable for program returned by clCreateKernel, there is no device in program then returned by clCreateKernelsInProgram, if no successfully built program executable present for device associated with command queue then returned by clEnqueueNDRangeKernel / clEnqueueTask";
+ case CL_INVALID_KERNEL_NAME: return "mentioned kernel name is not found in program";
+ case CL_INVALID_KERNEL_DEFINITION: return "arguments mismatch for the __kernel function definition and the passed ones, returned by clCreateKernel";
+ case CL_INVALID_KERNEL: return "specified kernel is an invalid kernel object";
+ case CL_INVALID_ARG_INDEX: return "clSetKernelArg if an invalid argument index is specified";
+ case CL_INVALID_ARG_VALUE: return "the argument value specified is NULL, returned by clSetKernelArg";
+ case CL_INVALID_ARG_SIZE: return "the given argument size (arg_size) do not match size of the data type for an argument, returned by clSetKernelArg";
+ case CL_INVALID_KERNEL_ARGS: return "the kernel argument values have not been specified, returned by clEnqueueNDRangeKernel / clEnqueueTask";
+ case CL_INVALID_WORK_DIMENSION: return "given work dimension is an invalid value, returned by clEnqueueNDRangeKernel";
+ case CL_INVALID_WORK_GROUP_SIZE: return "the specified local workgroup size and number of workitems specified by global workgroup size is not evenly divisible by local workgroup size";
+ case CL_INVALID_WORK_ITEM_SIZE: return "no. of workitems specified in any of local work group sizes is greater than the corresponding values specified by CL_DEVICE_MAX_WORK_ITEM_SIZES in that particular dimension";
+ case CL_INVALID_GLOBAL_OFFSET: return "global_work_offset is not NULL. Must currently be a NULL value. In a future revision of OpenCL, global_work_offset can be used but not until OCL 1.2";
+ case CL_INVALID_EVENT_WAIT_LIST: return "event wait list is NULL and (no. of events in wait list > 0), or event wait list is not NULL and no. of events in wait list is 0, or specified event objects are not valid events";
+ case CL_INVALID_EVENT: return "invalid event objects specified";
+ case CL_INVALID_GL_OBJECT: return "not a valid GL buffer object";
+ case CL_INVALID_BUFFER_SIZE: return "the value of the parameter size is 0 or exceeds CL_DEVICE_MAX_MEM_ALLOC_SIZE for all devices specified in the parameter context, returned by clCreateBuffer";
+ case CL_INVALID_GLOBAL_WORK_SIZE: return "specified global work size is NULL, or any of the values specified in global work dimensions are 0 or exceeds the range given by the sizeof(size_t) for the device on which the kernel will be enqueued, returned by clEnqueueNDRangeKernel";
+ case CL_INVALID_PROPERTY: return "context property name in properties is not a supported property name, returned by clCreateContext";
+ case CL_INVALID_IMAGE_DESCRIPTOR: return "values specified in image description are invalid";
+ case CL_INVALID_COMPILER_OPTIONS: return "compiler options specified by options are invalid, returned by clCompileProgram";
+ case CL_INVALID_LINKER_OPTIONS: return "linker options specified by options are invalid, returned by clLinkProgram";
+ case CL_INVALID_DEVICE_PARTITION_COUNT: return "partition name specified in properties is CL_DEVICE_PARTITION_BY_COUNTS and the number of sub-devices requested exceeds CL_DEVICE_PARTITION_MAX_SUB_DEVICES or the total number of compute units requested exceeds CL_DEVICE_PARTITION_MAX_COMPUTE_UNITS for in_device, or the number of compute units requested for one or more sub-devices is less than zero or the number of sub-devices requested exceeds CL_DEVICE_PARTITION_MAX_COMPUTE_UNITS for in_device";
+ case CL_INVALID_PIPE_SIZE: return "pipe_packet_size is 0 or the pipe_packet_size exceeds CL_DEVICE_PIPE_MAX_PACKET_SIZE value specified in table 4.3 (see clGetDeviceInfo) for all devices in context or pipe_max_packets is 0";
+ case CL_INVALID_SPEC_ID: return "spec_id is not a valid specialization constant identifier";
+ case CL_MAX_SIZE_RESTRICTION_EXCEEDED: return "the size in bytes of the memory object (if the argument is a memory object) or arg_size (if the argument is declared with local qualifier) exceeds a language- specified maximum size restriction for this argument, such as the MaxByteOffset SPIR-V decoration";
+
+ default: return "No description available";
+ }
+}
diff --git a/clUtil/clUtil.cpp b/clUtil/clUtil.cpp
index 02b3835..0f16152 100644
--- a/clUtil/clUtil.cpp
+++ b/clUtil/clUtil.cpp
@@ -1,6 +1,5 @@
#include "clutil.h"
-
void cl::clCall(cl_int err)
{
if(err != CL_SUCCESS) {
@@ -8,7 +7,6 @@ void cl::clCall(cl_int err)
}
}
-
std::vector cl::getDevices()
{
std::vector deviceList;
@@ -52,6 +50,10 @@ std::vector cl::getDevices()
info.cores = cores;
+ size_t maxWorkingGroupSize = 0;
+ clCall(clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &maxWorkingGroupSize, NULL));
+ info.maxWorkingGroupSize = maxWorkingGroupSize;
+
cl_ulong mem;
clCall(clGetDeviceInfo(devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem), &mem, NULL));
@@ -60,10 +62,10 @@ std::vector cl::getDevices()
deviceList.push_back(info);
}
- delete devices;
+ delete[] devices;
}
- delete platforms;
+ delete[] platforms;
return deviceList;
-}
\ No newline at end of file
+}
diff --git a/clUtil/clUtil.vcxproj b/clUtil/clUtil.vcxproj
index 4a66f61..055f977 100644
--- a/clUtil/clUtil.vcxproj
+++ b/clUtil/clUtil.vcxproj
@@ -5,6 +5,14 @@
Debug
Win32
+
+ Performance Release
+ Win32
+
+
+ Performance Release
+ x64
+
Release
Win32
@@ -28,26 +36,40 @@
StaticLibrary
true
- v141
+ ClangCl
+ MultiByte
+
+
+ StaticLibrary
+ false
+ ClangCl
MultiByte
StaticLibrary
false
- v141
+ ClangCl
true
MultiByte
StaticLibrary
true
- v142
+ ClangCl
MultiByte
+
+ StaticLibrary
+ false
+ ClangCl
+ MultiByte
+ true
+ x64
+
StaticLibrary
false
- v142
+ ClangCL
true
MultiByte
@@ -60,6 +82,10 @@
+
+
+
+
@@ -68,6 +94,10 @@
+
+
+
+
@@ -98,15 +128,40 @@
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;%(AdditionalIncludeDirectories)
-
+
Level3
Disabled
true
true
+ C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;%(AdditionalIncludeDirectories)
+
+
+
+
+ EnableAllWarnings
+ Disabled
+ true
+ true
$(OPENCL_INCLUDE);$(SolutionDir)\util;%(AdditionalIncludeDirectories)
+
+
+ Level3
+ true
+ true
+ $(OPENCL_INCLUDE);$(SolutionDir)\util;%(AdditionalIncludeDirectories)
+ None
+ MaxSpeed
+ AnySuitable
+ true
+ Speed
+ true
+ true
+ true
+
+
Level3
@@ -124,7 +179,7 @@
-
+
diff --git a/clUtil/clerrors.cpp b/clUtil/clerrors.cpp
deleted file mode 100644
index 56b1542..0000000
--- a/clUtil/clerrors.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-#include "clutil.h"
-
-
-std::string cl::getErrorString(cl_int err)
-{
- switch(err) {
- case 0: return "CL_SUCCESS";
- case -1: return "CL_DEVICE_NOT_FOUND";
- case -2: return "CL_DEVICE_NOT_AVAILABLE";
- case -3: return "CL_COMPILER_NOT_AVAILABLE";
- case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
- case -5: return "CL_OUT_OF_RESOURCES";
- case -6: return "CL_OUT_OF_HOST_MEMORY";
- case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
- case -8: return "CL_MEM_COPY_OVERLAP";
- case -9: return "CL_IMAGE_FORMAT_MISMATCH";
- case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
- case -11: return "CL_BUILD_PROGRAM_FAILURE";
- case -12: return "CL_MAP_FAILURE";
- case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
- case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
- case -15: return "CL_COMPILE_PROGRAM_FAILURE";
- case -16: return "CL_LINKER_NOT_AVAILABLE";
- case -17: return "CL_LINK_PROGRAM_FAILURE";
- case -18: return "CL_DEVICE_PARTITION_FAILED";
- case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
- case -30: return "CL_INVALID_VALUE";
- case -31: return "CL_INVALID_DEVICE_TYPE";
- case -32: return "CL_INVALID_PLATFORM";
- case -33: return "CL_INVALID_DEVICE";
- case -34: return "CL_INVALID_CONTEXT";
- case -35: return "CL_INVALID_QUEUE_PROPERTIES";
- case -36: return "CL_INVALID_COMMAND_QUEUE";
- case -37: return "CL_INVALID_HOST_PTR";
- case -38: return "CL_INVALID_MEM_OBJECT";
- case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
- case -40: return "CL_INVALID_IMAGE_SIZE";
- case -41: return "CL_INVALID_SAMPLER";
- case -42: return "CL_INVALID_BINARY";
- case -43: return "CL_INVALID_BUILD_OPTIONS";
- case -44: return "CL_INVALID_PROGRAM";
- case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
- case -46: return "CL_INVALID_KERNEL_NAME";
- case -47: return "CL_INVALID_KERNEL_DEFINITION";
- case -48: return "CL_INVALID_KERNEL";
- case -49: return "CL_INVALID_ARG_INDEX";
- case -50: return "CL_INVALID_ARG_VALUE";
- case -51: return "CL_INVALID_ARG_SIZE";
- case -52: return "CL_INVALID_KERNEL_ARGS";
- case -53: return "CL_INVALID_WORK_DIMENSION";
- case -54: return "CL_INVALID_WORK_GROUP_SIZE";
- case -55: return "CL_INVALID_WORK_ITEM_SIZE";
- case -56: return "CL_INVALID_GLOBAL_OFFSET";
- case -57: return "CL_INVALID_EVENT_WAIT_LIST";
- case -58: return "CL_INVALID_EVENT";
- case -59: return "CL_INVALID_OPERATION";
- case -60: return "CL_INVALID_GL_OBJECT";
- case -61: return "CL_INVALID_BUFFER_SIZE";
- case -62: return "CL_INVALID_MIP_LEVEL";
- case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
- case -64: return "CL_INVALID_PROPERTY";
- case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
- case -66: return "CL_INVALID_COMPILER_OPTIONS";
- case -67: return "CL_INVALID_LINKER_OPTIONS";
- case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
- default: return "CL_UNKNOWN_ERROR";
- }
-
-}
\ No newline at end of file
diff --git a/clUtil/clutil.h b/clUtil/clutil.h
index 3559145..617a2b5 100644
--- a/clUtil/clutil.h
+++ b/clUtil/clutil.h
@@ -1,5 +1,5 @@
-#ifndef _CL_UTIL_H
-#define _CL_UTIL_H
+#ifndef CL_UTIL_H
+#define CL_UTIL_H
#ifdef __APPLE__
#define CL_SILENCE_DEPRECATION
@@ -12,31 +12,42 @@
#include
namespace cl {
- std::string getErrorString(cl_int err);
+ std::string getOpenCLErrorName(cl_int errorCode);
+ std::string getOpenCLErrorDescription(cl_int errorCode);
typedef struct {
cl_device_id id;
int cores;
uint64_t mem;
std::string name;
-
+ size_t maxWorkingGroupSize;
}CLDeviceInfo;
class CLException {
public:
int error;
std::string msg;
+ std::string description;
+
+ CLException(cl_int errorCode)
+ {
+ this->error = errorCode;
+ this->msg = getOpenCLErrorName(errorCode);
+ this->description = getOpenCLErrorDescription(errorCode);
+ }
- CLException(cl_int err)
+ CLException(cl_int errorCode, std::string pMsg)
{
- this->error = err;
- this->msg = getErrorString(err);
+ this->error = errorCode;
+ this->msg = pMsg;
+ this->description = getOpenCLErrorDescription(errorCode);
}
- CLException(cl_int err, std::string msg)
+ CLException(cl_int errorCode, std::string pMsg, std::string pDescription)
{
- this->error = err;
- this->msg = msg;
+ this->error = errorCode;
+ this->msg = pMsg;
+ this->description = pDescription;
}
};
@@ -44,10 +55,8 @@ namespace cl {
std::vector getDevices();
- int getDeviceCount();
-
void clCall(cl_int err);
}
-#endif
\ No newline at end of file
+#endif
diff --git a/cudaInfo/Makefile b/cudaInfo/Makefile
deleted file mode 100644
index 8c81055..0000000
--- a/cudaInfo/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-CPPSRC:=$(wildcard *.cpp)
-
-all:
- ${CXX} -o cudainfo.bin ${CPPSRC} ${INCLUDE} -I${CUDA_INCLUDE} ${CXXFLAGS} ${LIBS} -L${CUDA_LIB} -lcudautil -lcudart
- mkdir -p $(BINDIR)
- cp cudainfo.bin $(BINDIR)/cudainfo
-
-clean:
- rm -rf cudainfo.bin
\ No newline at end of file
diff --git a/cudaInfo/cudaInfo.vcxproj b/cudaInfo/cudaInfo.vcxproj
deleted file mode 100644
index 6636506..0000000
--- a/cudaInfo/cudaInfo.vcxproj
+++ /dev/null
@@ -1,94 +0,0 @@
-
-
-
-
- Debug
- x64
-
-
- Release
- x64
-
-
-
-
-
-
-
- {eadaaa54-e304-4656-8263-e5e688ff323d}
-
-
-
- {9E8ECC85-AF9F-4F17-9397-633CA2FEE94E}
- cudaInfo
- 10.0
-
-
-
- Application
- true
- MultiByte
- v142
-
-
- Application
- false
- true
- MultiByte
- v142
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
-
-
-
- Level3
- Disabled
- WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
- $(SolutionDir)cudaUtil;%(AdditionalIncludeDirectories)
-
-
- true
- Console
- cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
-
-
- 64
-
-
-
-
- Level3
- MaxSpeed
- true
- true
- WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
- $(SolutionDir)cudaUtil;%(AdditionalIncludeDirectories)
-
-
- true
- true
- true
- Console
- cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
-
-
- 64
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/cudaInfo/main.cpp b/cudaInfo/main.cpp
deleted file mode 100644
index 77d1fec..0000000
--- a/cudaInfo/main.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-#include
-#include
-
-#include"cudaUtil.h"
-
-void printDeviceInfo(const cuda::CudaDeviceInfo &info)
-{
- printf("ID: %d\n", info.id);
- printf("Name: %s\n", info.name.c_str());
- printf("Capability: %d.%d\n", info.major, info.minor);
- printf("MP: %d\n", info.mpCount);
- printf("Cores: %d (%d per MP)\n", info.mpCount * info.cores, info.cores);
- printf("Memory: %dMB\n", (int)(info.mem / (1024 * 1024)));
-}
-
-int main(int argc, char **argv)
-{
- try {
- std::vector devices = cuda::getDevices();
-
- printf("Found %d devices\n\n", (int)devices.size());
-
- for(int i = 0; i < (int)devices.size(); i++) {
- printDeviceInfo(devices[i]);
- printf("\n");
- }
- } catch(cuda::CudaException &ex) {
- printf("Error querying devices: %s\n", ex.msg.c_str());
-
- return 1;
- }
-
- return 0;
-}
\ No newline at end of file
diff --git a/cudaMath/cudaMath.vcxproj b/cudaMath/cudaMath.vcxproj
deleted file mode 100644
index fcf4097..0000000
--- a/cudaMath/cudaMath.vcxproj
+++ /dev/null
@@ -1,90 +0,0 @@
-
-
-
-
- Debug
- x64
-
-
- Release
- x64
-
-
-
- {E1BDB205-8994-4E49-8B35-172A84E7118C}
- cudaMath
- 10.0
-
-
-
- Application
- true
- MultiByte
- v142
-
-
- Application
- false
- true
- MultiByte
- v142
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
-
-
-
- Level3
- Disabled
- WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
-
-
- true
- Console
- cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
-
-
- 64
-
-
-
-
- Level3
- MaxSpeed
- true
- true
- WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
-
-
- true
- true
- true
- Console
- cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
-
-
- 64
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/cudaMath/ptx.cuh b/cudaMath/ptx.cuh
deleted file mode 100644
index 0bbcffc..0000000
--- a/cudaMath/ptx.cuh
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _PTX_H
-#define _PTX_H
-
-#include
-
-#define madc_hi(dest, a, x, b) asm volatile("madc.hi.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b))
-#define madc_hi_cc(dest, a, x, b) asm volatile("madc.hi.cc.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b))
-#define mad_hi_cc(dest, a, x, b) asm volatile("mad.hi.cc.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b))
-
-#define mad_lo_cc(dest, a, x, b) asm volatile("mad.lo.cc.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b))
-#define madc_lo(dest, a, x, b) asm volatile("madc.lo.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x), "r"(b))
-#define madc_lo_cc(dest, a, x, b) asm volatile("madc.lo.cc.u32 %0, %1, %2, %3;\n\t" : "=r"(dest) : "r"(a), "r"(x),"r"(b))
-
-#define addc(dest, a, b) asm volatile("addc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-#define add_cc(dest, a, b) asm volatile("add.cc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-#define addc_cc(dest, a, b) asm volatile("addc.cc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-
-#define sub_cc(dest, a, b) asm volatile("sub.cc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-#define subc_cc(dest, a, b) asm volatile("subc.cc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-#define subc(dest, a, b) asm volatile("subc.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-
-#define set_eq(dest,a,b) asm volatile("set.eq.u32.u32 %0, %1, %2;\n\t" : "=r"(dest) : "r"(a), "r"(b))
-
-#define lsbpos(x) (__ffs((x)))
-
-
-__device__ __forceinline__ unsigned int endian(unsigned int x)
-{
- return (x << 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | (x >> 24);
-}
-
-#endif
\ No newline at end of file
diff --git a/cudaMath/ripemd160.cuh b/cudaMath/ripemd160.cuh
deleted file mode 100644
index 47590bb..0000000
--- a/cudaMath/ripemd160.cuh
+++ /dev/null
@@ -1,539 +0,0 @@
-#ifndef _RIPEMD160_CUH
-#define _RIPEMD160_CUH
-
-#include
-#include
-#include
-#include "ptx.cuh"
-
-__constant__ unsigned int _RIPEMD160_IV[5] = {
- 0x67452301,
- 0xefcdab89,
- 0x98badcfe,
- 0x10325476,
- 0xc3d2e1f0
-};
-
-__constant__ unsigned int _K0 = 0x5a827999;
-__constant__ unsigned int _K1 = 0x6ed9eba1;
-__constant__ unsigned int _K2 = 0x8f1bbcdc;
-__constant__ unsigned int _K3 = 0xa953fd4e;
-
-__constant__ unsigned int _K4 = 0x7a6d76e9;
-__constant__ unsigned int _K5 = 0x6d703ef3;
-__constant__ unsigned int _K6 = 0x5c4dd124;
-__constant__ unsigned int _K7 = 0x50a28be6;
-
-
-__device__ __forceinline__ unsigned int rotl(unsigned int x, int n)
-{
- return (x << n) | (x >> (32 - n));
-}
-
-__device__ __forceinline__ unsigned int F(unsigned int x, unsigned int y, unsigned int z)
-{
- return x ^ y ^ z;
-}
-
-__device__ __forceinline__ unsigned int G(unsigned int x, unsigned int y, unsigned int z)
-{
- return (((x) & (y)) | (~(x) & (z)));
-}
-
-__device__ __forceinline__ unsigned int H(unsigned int x, unsigned int y, unsigned int z)
-{
- return (((x) | ~(y)) ^ (z));
-}
-
-__device__ __forceinline__ unsigned int I(unsigned int x, unsigned int y, unsigned int z)
-{
- return (((x) & (z)) | ((y) & ~(z)));
-}
-
-__device__ __forceinline__ unsigned int J(unsigned int x, unsigned int y, unsigned int z)
-{
- return ((x) ^ ((y) | ~(z)));
-}
-
-__device__ __forceinline__ void FF(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
- a += F(b, c, d) + x;
- a = rotl(a, s) + e;
- c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void GG(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
- a += G(b, c, d) + x + _K0;
- a = rotl(a, s) + e;
- c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void HH(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
- a += H(b, c, d) + x + _K1;
- a = rotl(a, s) + e;
- c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void II(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
- a += I(b, c, d) + x + _K2;
- a = rotl(a, s) + e;
- c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void JJ(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
- a += J(b, c, d) + x + _K3;
- a = rotl(a, s) + e;
- c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void FFF(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
- a += F(b, c, d) + x;
- a = rotl(a, s) + e;
- c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void GGG(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
- a += G(b, c, d) + x + _K4;
- a = rotl(a, s) + e;
- c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void HHH(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
- a += H(b, c, d) + x + _K5;
- a = rotl(a, s) + e;
- c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void III(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
- a += I(b, c, d) + x + _K6;
- a = rotl(a, s) + e;
- c = rotl(c, 10);
-}
-
-__device__ __forceinline__ void JJJ(unsigned int &a, unsigned int &b, unsigned int &c, unsigned int &d, unsigned int &e, unsigned int x, unsigned int s)
-{
- a += J(b, c, d) + x + _K7;
- a = rotl(a, s) + e;
- c = rotl(c, 10);
-}
-
-
-
-__device__ void ripemd160sha256(const unsigned int x[8], unsigned int digest[5])
-{
- unsigned int a1 = _RIPEMD160_IV[0];
- unsigned int b1 = _RIPEMD160_IV[1];
- unsigned int c1 = _RIPEMD160_IV[2];
- unsigned int d1 = _RIPEMD160_IV[3];
- unsigned int e1 = _RIPEMD160_IV[4];
-
- const unsigned int x8 = 0x00000080;
- const unsigned int x14 = 256;
-
- /* round 1 */
- FF(a1, b1, c1, d1, e1, x[0], 11);
- FF(e1, a1, b1, c1, d1, x[1], 14);
- FF(d1, e1, a1, b1, c1, x[2], 15);
- FF(c1, d1, e1, a1, b1, x[3], 12);
- FF(b1, c1, d1, e1, a1, x[4], 5);
- FF(a1, b1, c1, d1, e1, x[5], 8);
- FF(e1, a1, b1, c1, d1, x[6], 7);
- FF(d1, e1, a1, b1, c1, x[7], 9);
- FF(c1, d1, e1, a1, b1, x8, 11);
- FF(b1, c1, d1, e1, a1, 0, 13);
- FF(a1, b1, c1, d1, e1, 0, 14);
- FF(e1, a1, b1, c1, d1, 0, 15);
- FF(d1, e1, a1, b1, c1, 0, 6);
- FF(c1, d1, e1, a1, b1, 0, 7);
- FF(b1, c1, d1, e1, a1, x14, 9);
- FF(a1, b1, c1, d1, e1, 0, 8);
-
- /* round 2 */
- GG(e1, a1, b1, c1, d1, x[7], 7);
- GG(d1, e1, a1, b1, c1, x[4], 6);
- GG(c1, d1, e1, a1, b1, 0, 8);
- GG(b1, c1, d1, e1, a1, x[1], 13);
- GG(a1, b1, c1, d1, e1, 0, 11);
- GG(e1, a1, b1, c1, d1, x[6], 9);
- GG(d1, e1, a1, b1, c1, 0, 7);
- GG(c1, d1, e1, a1, b1, x[3], 15);
- GG(b1, c1, d1, e1, a1, 0, 7);
- GG(a1, b1, c1, d1, e1, x[0], 12);
- GG(e1, a1, b1, c1, d1, 0, 15);
- GG(d1, e1, a1, b1, c1, x[5], 9);
- GG(c1, d1, e1, a1, b1, x[2], 11);
- GG(b1, c1, d1, e1, a1, x14, 7);
- GG(a1, b1, c1, d1, e1, 0, 13);
- GG(e1, a1, b1, c1, d1, x8, 12);
-
- /* round 3 */
- HH(d1, e1, a1, b1, c1, x[3], 11);
- HH(c1, d1, e1, a1, b1, 0, 13);
- HH(b1, c1, d1, e1, a1, x14, 6);
- HH(a1, b1, c1, d1, e1, x[4], 7);
- HH(e1, a1, b1, c1, d1, 0, 14);
- HH(d1, e1, a1, b1, c1, 0, 9);
- HH(c1, d1, e1, a1, b1, x8, 13);
- HH(b1, c1, d1, e1, a1, x[1], 15);
- HH(a1, b1, c1, d1, e1, x[2], 14);
- HH(e1, a1, b1, c1, d1, x[7], 8);
- HH(d1, e1, a1, b1, c1, x[0], 13);
- HH(c1, d1, e1, a1, b1, x[6], 6);
- HH(b1, c1, d1, e1, a1, 0, 5);
- HH(a1, b1, c1, d1, e1, 0, 12);
- HH(e1, a1, b1, c1, d1, x[5], 7);
- HH(d1, e1, a1, b1, c1, 0, 5);
-
- /* round 4 */
- II(c1, d1, e1, a1, b1, x[1], 11);
- II(b1, c1, d1, e1, a1, 0, 12);
- II(a1, b1, c1, d1, e1, 0, 14);
- II(e1, a1, b1, c1, d1, 0, 15);
- II(d1, e1, a1, b1, c1, x[0], 14);
- II(c1, d1, e1, a1, b1, x8, 15);
- II(b1, c1, d1, e1, a1, 0, 9);
- II(a1, b1, c1, d1, e1, x[4], 8);
- II(e1, a1, b1, c1, d1, 0, 9);
- II(d1, e1, a1, b1, c1, x[3], 14);
- II(c1, d1, e1, a1, b1, x[7], 5);
- II(b1, c1, d1, e1, a1, 0, 6);
- II(a1, b1, c1, d1, e1, x14, 8);
- II(e1, a1, b1, c1, d1, x[5], 6);
- II(d1, e1, a1, b1, c1, x[6], 5);
- II(c1, d1, e1, a1, b1, x[2], 12);
-
- /* round 5 */
- JJ(b1, c1, d1, e1, a1, x[4], 9);
- JJ(a1, b1, c1, d1, e1, x[0], 15);
- JJ(e1, a1, b1, c1, d1, x[5], 5);
- JJ(d1, e1, a1, b1, c1, 0, 11);
- JJ(c1, d1, e1, a1, b1, x[7], 6);
- JJ(b1, c1, d1, e1, a1, 0, 8);
- JJ(a1, b1, c1, d1, e1, x[2], 13);
- JJ(e1, a1, b1, c1, d1, 0, 12);
- JJ(d1, e1, a1, b1, c1, x14, 5);
- JJ(c1, d1, e1, a1, b1, x[1], 12);
- JJ(b1, c1, d1, e1, a1, x[3], 13);
- JJ(a1, b1, c1, d1, e1, x8, 14);
- JJ(e1, a1, b1, c1, d1, 0, 11);
- JJ(d1, e1, a1, b1, c1, x[6], 8);
- JJ(c1, d1, e1, a1, b1, 0, 5);
- JJ(b1, c1, d1, e1, a1, 0, 6);
-
- unsigned int a2 = _RIPEMD160_IV[0];
- unsigned int b2 = _RIPEMD160_IV[1];
- unsigned int c2 = _RIPEMD160_IV[2];
- unsigned int d2 = _RIPEMD160_IV[3];
- unsigned int e2 = _RIPEMD160_IV[4];
-
- /* parallel round 1 */
- JJJ(a2, b2, c2, d2, e2, x[5], 8);
- JJJ(e2, a2, b2, c2, d2, x14, 9);
- JJJ(d2, e2, a2, b2, c2, x[7], 9);
- JJJ(c2, d2, e2, a2, b2, x[0], 11);
- JJJ(b2, c2, d2, e2, a2, 0, 13);
- JJJ(a2, b2, c2, d2, e2, x[2], 15);
- JJJ(e2, a2, b2, c2, d2, 0, 15);
- JJJ(d2, e2, a2, b2, c2, x[4], 5);
- JJJ(c2, d2, e2, a2, b2, 0, 7);
- JJJ(b2, c2, d2, e2, a2, x[6], 7);
- JJJ(a2, b2, c2, d2, e2, 0, 8);
- JJJ(e2, a2, b2, c2, d2, x8, 11);
- JJJ(d2, e2, a2, b2, c2, x[1], 14);
- JJJ(c2, d2, e2, a2, b2, 0, 14);
- JJJ(b2, c2, d2, e2, a2, x[3], 12);
- JJJ(a2, b2, c2, d2, e2, 0, 6);
-
- /* parallel round 2 */
- III(e2, a2, b2, c2, d2, x[6], 9);
- III(d2, e2, a2, b2, c2, 0, 13);
- III(c2, d2, e2, a2, b2, x[3], 15);
- III(b2, c2, d2, e2, a2, x[7], 7);
- III(a2, b2, c2, d2, e2, x[0], 12);
- III(e2, a2, b2, c2, d2, 0, 8);
- III(d2, e2, a2, b2, c2, x[5], 9);
- III(c2, d2, e2, a2, b2, 0, 11);
- III(b2, c2, d2, e2, a2, x14, 7);
- III(a2, b2, c2, d2, e2, 0, 7);
- III(e2, a2, b2, c2, d2, x8, 12);
- III(d2, e2, a2, b2, c2, 0, 7);
- III(c2, d2, e2, a2, b2, x[4], 6);
- III(b2, c2, d2, e2, a2, 0, 15);
- III(a2, b2, c2, d2, e2, x[1], 13);
- III(e2, a2, b2, c2, d2, x[2], 11);
-
- /* parallel round 3 */
- HHH(d2, e2, a2, b2, c2, 0, 9);
- HHH(c2, d2, e2, a2, b2, x[5], 7);
- HHH(b2, c2, d2, e2, a2, x[1], 15);
- HHH(a2, b2, c2, d2, e2, x[3], 11);
- HHH(e2, a2, b2, c2, d2, x[7], 8);
- HHH(d2, e2, a2, b2, c2, x14, 6);
- HHH(c2, d2, e2, a2, b2, x[6], 6);
- HHH(b2, c2, d2, e2, a2, 0, 14);
- HHH(a2, b2, c2, d2, e2, 0, 12);
- HHH(e2, a2, b2, c2, d2, x8, 13);
- HHH(d2, e2, a2, b2, c2, 0, 5);
- HHH(c2, d2, e2, a2, b2, x[2], 14);
- HHH(b2, c2, d2, e2, a2, 0, 13);
- HHH(a2, b2, c2, d2, e2, x[0], 13);
- HHH(e2, a2, b2, c2, d2, x[4], 7);
- HHH(d2, e2, a2, b2, c2, 0, 5);
-
- /* parallel round 4 */
- GGG(c2, d2, e2, a2, b2, x8, 15);
- GGG(b2, c2, d2, e2, a2, x[6], 5);
- GGG(a2, b2, c2, d2, e2, x[4], 8);
- GGG(e2, a2, b2, c2, d2, x[1], 11);
- GGG(d2, e2, a2, b2, c2, x[3], 14);
- GGG(c2, d2, e2, a2, b2, 0, 14);
- GGG(b2, c2, d2, e2, a2, 0, 6);
- GGG(a2, b2, c2, d2, e2, x[0], 14);
- GGG(e2, a2, b2, c2, d2, x[5], 6);
- GGG(d2, e2, a2, b2, c2, 0, 9);
- GGG(c2, d2, e2, a2, b2, x[2], 12);
- GGG(b2, c2, d2, e2, a2, 0, 9);
- GGG(a2, b2, c2, d2, e2, 0, 12);
- GGG(e2, a2, b2, c2, d2, x[7], 5);
- GGG(d2, e2, a2, b2, c2, 0, 15);
- GGG(c2, d2, e2, a2, b2, x14, 8);
-
- /* parallel round 5 */
- FFF(b2, c2, d2, e2, a2, 0, 8);
- FFF(a2, b2, c2, d2, e2, 0, 5);
- FFF(e2, a2, b2, c2, d2, 0, 12);
- FFF(d2, e2, a2, b2, c2, x[4], 9);
- FFF(c2, d2, e2, a2, b2, x[1], 12);
- FFF(b2, c2, d2, e2, a2, x[5], 5);
- FFF(a2, b2, c2, d2, e2, x8, 14);
- FFF(e2, a2, b2, c2, d2, x[7], 6);
- FFF(d2, e2, a2, b2, c2, x[6], 8);
- FFF(c2, d2, e2, a2, b2, x[2], 13);
- FFF(b2, c2, d2, e2, a2, 0, 6);
- FFF(a2, b2, c2, d2, e2, x14, 5);
- FFF(e2, a2, b2, c2, d2, x[0], 15);
- FFF(d2, e2, a2, b2, c2, x[3], 13);
- FFF(c2, d2, e2, a2, b2, 0, 11);
- FFF(b2, c2, d2, e2, a2, 0, 11);
-
- digest[0] = _RIPEMD160_IV[1] + c1 + d2;
- digest[1] = _RIPEMD160_IV[2] + d1 + e2;
- digest[2] = _RIPEMD160_IV[3] + e1 + a2;
- digest[3] = _RIPEMD160_IV[4] + a1 + b2;
- digest[4] = _RIPEMD160_IV[0] + b1 + c2;
-}
-
-
-
-__device__ void ripemd160sha256NoFinal(const unsigned int x[8], unsigned int digest[5])
-{
- unsigned int a1 = _RIPEMD160_IV[0];
- unsigned int b1 = _RIPEMD160_IV[1];
- unsigned int c1 = _RIPEMD160_IV[2];
- unsigned int d1 = _RIPEMD160_IV[3];
- unsigned int e1 = _RIPEMD160_IV[4];
-
- const unsigned int x8 = 0x00000080;
- const unsigned int x14 = 256;
-
- /* round 1 */
- FF(a1, b1, c1, d1, e1, x[0], 11);
- FF(e1, a1, b1, c1, d1, x[1], 14);
- FF(d1, e1, a1, b1, c1, x[2], 15);
- FF(c1, d1, e1, a1, b1, x[3], 12);
- FF(b1, c1, d1, e1, a1, x[4], 5);
- FF(a1, b1, c1, d1, e1, x[5], 8);
- FF(e1, a1, b1, c1, d1, x[6], 7);
- FF(d1, e1, a1, b1, c1, x[7], 9);
- FF(c1, d1, e1, a1, b1, x8, 11);
- FF(b1, c1, d1, e1, a1, 0, 13);
- FF(a1, b1, c1, d1, e1, 0, 14);
- FF(e1, a1, b1, c1, d1, 0, 15);
- FF(d1, e1, a1, b1, c1, 0, 6);
- FF(c1, d1, e1, a1, b1, 0, 7);
- FF(b1, c1, d1, e1, a1, x14, 9);
- FF(a1, b1, c1, d1, e1, 0, 8);
-
- /* round 2 */
- GG(e1, a1, b1, c1, d1, x[7], 7);
- GG(d1, e1, a1, b1, c1, x[4], 6);
- GG(c1, d1, e1, a1, b1, 0, 8);
- GG(b1, c1, d1, e1, a1, x[1], 13);
- GG(a1, b1, c1, d1, e1, 0, 11);
- GG(e1, a1, b1, c1, d1, x[6], 9);
- GG(d1, e1, a1, b1, c1, 0, 7);
- GG(c1, d1, e1, a1, b1, x[3], 15);
- GG(b1, c1, d1, e1, a1, 0, 7);
- GG(a1, b1, c1, d1, e1, x[0], 12);
- GG(e1, a1, b1, c1, d1, 0, 15);
- GG(d1, e1, a1, b1, c1, x[5], 9);
- GG(c1, d1, e1, a1, b1, x[2], 11);
- GG(b1, c1, d1, e1, a1, x14, 7);
- GG(a1, b1, c1, d1, e1, 0, 13);
- GG(e1, a1, b1, c1, d1, x8, 12);
-
- /* round 3 */
- HH(d1, e1, a1, b1, c1, x[3], 11);
- HH(c1, d1, e1, a1, b1, 0, 13);
- HH(b1, c1, d1, e1, a1, x14, 6);
- HH(a1, b1, c1, d1, e1, x[4], 7);
- HH(e1, a1, b1, c1, d1, 0, 14);
- HH(d1, e1, a1, b1, c1, 0, 9);
- HH(c1, d1, e1, a1, b1, x8, 13);
- HH(b1, c1, d1, e1, a1, x[1], 15);
- HH(a1, b1, c1, d1, e1, x[2], 14);
- HH(e1, a1, b1, c1, d1, x[7], 8);
- HH(d1, e1, a1, b1, c1, x[0], 13);
- HH(c1, d1, e1, a1, b1, x[6], 6);
- HH(b1, c1, d1, e1, a1, 0, 5);
- HH(a1, b1, c1, d1, e1, 0, 12);
- HH(e1, a1, b1, c1, d1, x[5], 7);
- HH(d1, e1, a1, b1, c1, 0, 5);
-
- /* round 4 */
- II(c1, d1, e1, a1, b1, x[1], 11);
- II(b1, c1, d1, e1, a1, 0, 12);
- II(a1, b1, c1, d1, e1, 0, 14);
- II(e1, a1, b1, c1, d1, 0, 15);
- II(d1, e1, a1, b1, c1, x[0], 14);
- II(c1, d1, e1, a1, b1, x8, 15);
- II(b1, c1, d1, e1, a1, 0, 9);
- II(a1, b1, c1, d1, e1, x[4], 8);
- II(e1, a1, b1, c1, d1, 0, 9);
- II(d1, e1, a1, b1, c1, x[3], 14);
- II(c1, d1, e1, a1, b1, x[7], 5);
- II(b1, c1, d1, e1, a1, 0, 6);
- II(a1, b1, c1, d1, e1, x14, 8);
- II(e1, a1, b1, c1, d1, x[5], 6);
- II(d1, e1, a1, b1, c1, x[6], 5);
- II(c1, d1, e1, a1, b1, x[2], 12);
-
- /* round 5 */
- JJ(b1, c1, d1, e1, a1, x[4], 9);
- JJ(a1, b1, c1, d1, e1, x[0], 15);
- JJ(e1, a1, b1, c1, d1, x[5], 5);
- JJ(d1, e1, a1, b1, c1, 0, 11);
- JJ(c1, d1, e1, a1, b1, x[7], 6);
- JJ(b1, c1, d1, e1, a1, 0, 8);
- JJ(a1, b1, c1, d1, e1, x[2], 13);
- JJ(e1, a1, b1, c1, d1, 0, 12);
- JJ(d1, e1, a1, b1, c1, x14, 5);
- JJ(c1, d1, e1, a1, b1, x[1], 12);
- JJ(b1, c1, d1, e1, a1, x[3], 13);
- JJ(a1, b1, c1, d1, e1, x8, 14);
- JJ(e1, a1, b1, c1, d1, 0, 11);
- JJ(d1, e1, a1, b1, c1, x[6], 8);
- JJ(c1, d1, e1, a1, b1, 0, 5);
- JJ(b1, c1, d1, e1, a1, 0, 6);
-
- unsigned int a2 = _RIPEMD160_IV[0];
- unsigned int b2 = _RIPEMD160_IV[1];
- unsigned int c2 = _RIPEMD160_IV[2];
- unsigned int d2 = _RIPEMD160_IV[3];
- unsigned int e2 = _RIPEMD160_IV[4];
-
- /* parallel round 1 */
- JJJ(a2, b2, c2, d2, e2, x[5], 8);
- JJJ(e2, a2, b2, c2, d2, x14, 9);
- JJJ(d2, e2, a2, b2, c2, x[7], 9);
- JJJ(c2, d2, e2, a2, b2, x[0], 11);
- JJJ(b2, c2, d2, e2, a2, 0, 13);
- JJJ(a2, b2, c2, d2, e2, x[2], 15);
- JJJ(e2, a2, b2, c2, d2, 0, 15);
- JJJ(d2, e2, a2, b2, c2, x[4], 5);
- JJJ(c2, d2, e2, a2, b2, 0, 7);
- JJJ(b2, c2, d2, e2, a2, x[6], 7);
- JJJ(a2, b2, c2, d2, e2, 0, 8);
- JJJ(e2, a2, b2, c2, d2, x8, 11);
- JJJ(d2, e2, a2, b2, c2, x[1], 14);
- JJJ(c2, d2, e2, a2, b2, 0, 14);
- JJJ(b2, c2, d2, e2, a2, x[3], 12);
- JJJ(a2, b2, c2, d2, e2, 0, 6);
-
- /* parallel round 2 */
- III(e2, a2, b2, c2, d2, x[6], 9);
- III(d2, e2, a2, b2, c2, 0, 13);
- III(c2, d2, e2, a2, b2, x[3], 15);
- III(b2, c2, d2, e2, a2, x[7], 7);
- III(a2, b2, c2, d2, e2, x[0], 12);
- III(e2, a2, b2, c2, d2, 0, 8);
- III(d2, e2, a2, b2, c2, x[5], 9);
- III(c2, d2, e2, a2, b2, 0, 11);
- III(b2, c2, d2, e2, a2, x14, 7);
- III(a2, b2, c2, d2, e2, 0, 7);
- III(e2, a2, b2, c2, d2, x8, 12);
- III(d2, e2, a2, b2, c2, 0, 7);
- III(c2, d2, e2, a2, b2, x[4], 6);
- III(b2, c2, d2, e2, a2, 0, 15);
- III(a2, b2, c2, d2, e2, x[1], 13);
- III(e2, a2, b2, c2, d2, x[2], 11);
-
- /* parallel round 3 */
- HHH(d2, e2, a2, b2, c2, 0, 9);
- HHH(c2, d2, e2, a2, b2, x[5], 7);
- HHH(b2, c2, d2, e2, a2, x[1], 15);
- HHH(a2, b2, c2, d2, e2, x[3], 11);
- HHH(e2, a2, b2, c2, d2, x[7], 8);
- HHH(d2, e2, a2, b2, c2, x14, 6);
- HHH(c2, d2, e2, a2, b2, x[6], 6);
- HHH(b2, c2, d2, e2, a2, 0, 14);
- HHH(a2, b2, c2, d2, e2, 0, 12);
- HHH(e2, a2, b2, c2, d2, x8, 13);
- HHH(d2, e2, a2, b2, c2, 0, 5);
- HHH(c2, d2, e2, a2, b2, x[2], 14);
- HHH(b2, c2, d2, e2, a2, 0, 13);
- HHH(a2, b2, c2, d2, e2, x[0], 13);
- HHH(e2, a2, b2, c2, d2, x[4], 7);
- HHH(d2, e2, a2, b2, c2, 0, 5);
-
- /* parallel round 4 */
- GGG(c2, d2, e2, a2, b2, x8, 15);
- GGG(b2, c2, d2, e2, a2, x[6], 5);
- GGG(a2, b2, c2, d2, e2, x[4], 8);
- GGG(e2, a2, b2, c2, d2, x[1], 11);
- GGG(d2, e2, a2, b2, c2, x[3], 14);
- GGG(c2, d2, e2, a2, b2, 0, 14);
- GGG(b2, c2, d2, e2, a2, 0, 6);
- GGG(a2, b2, c2, d2, e2, x[0], 14);
- GGG(e2, a2, b2, c2, d2, x[5], 6);
- GGG(d2, e2, a2, b2, c2, 0, 9);
- GGG(c2, d2, e2, a2, b2, x[2], 12);
- GGG(b2, c2, d2, e2, a2, 0, 9);
- GGG(a2, b2, c2, d2, e2, 0, 12);
- GGG(e2, a2, b2, c2, d2, x[7], 5);
- GGG(d2, e2, a2, b2, c2, 0, 15);
- GGG(c2, d2, e2, a2, b2, x14, 8);
-
- /* parallel round 5 */
- FFF(b2, c2, d2, e2, a2, 0, 8);
- FFF(a2, b2, c2, d2, e2, 0, 5);
- FFF(e2, a2, b2, c2, d2, 0, 12);
- FFF(d2, e2, a2, b2, c2, x[4], 9);
- FFF(c2, d2, e2, a2, b2, x[1], 12);
- FFF(b2, c2, d2, e2, a2, x[5], 5);
- FFF(a2, b2, c2, d2, e2, x8, 14);
- FFF(e2, a2, b2, c2, d2, x[7], 6);
- FFF(d2, e2, a2, b2, c2, x[6], 8);
- FFF(c2, d2, e2, a2, b2, x[2], 13);
- FFF(b2, c2, d2, e2, a2, 0, 6);
- FFF(a2, b2, c2, d2, e2, x14, 5);
- FFF(e2, a2, b2, c2, d2, x[0], 15);
- FFF(d2, e2, a2, b2, c2, x[3], 13);
- FFF(c2, d2, e2, a2, b2, 0, 11);
- FFF(b2, c2, d2, e2, a2, 0, 11);
-
- digest[0] = c1 + d2;
- digest[1] = d1 + e2;
- digest[2] = e1 + a2;
- digest[3] = a1 + b2;
- digest[4] = b1 + c2;
-}
-#endif
\ No newline at end of file
diff --git a/cudaMath/secp256k1.cuh b/cudaMath/secp256k1.cuh
deleted file mode 100644
index 88a3fed..0000000
--- a/cudaMath/secp256k1.cuh
+++ /dev/null
@@ -1,802 +0,0 @@
-#ifndef _SECP256K1_CUH
-#define _SECP256K1_CUH
-
-#include
-#include
-
-#include "ptx.cuh"
-
-
-/**
- Prime modulus 2^256 - 2^32 - 977
- */
-__constant__ static unsigned int _P[8] = {
- 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2F
-};
-
-/**
- Base point X
- */
-__constant__ static unsigned int _GX[8] = {
- 0x79BE667E, 0xF9DCBBAC, 0x55A06295, 0xCE870B07, 0x029BFCDB, 0x2DCE28D9, 0x59F2815B, 0x16F81798
-};
-
-
-/**
- Base point Y
- */
-__constant__ static unsigned int _GY[8] = {
- 0x483ADA77, 0x26A3C465, 0x5DA4FBFC, 0x0E1108A8, 0xFD17B448, 0xA6855419, 0x9C47D08F, 0xFB10D4B8
-};
-
-
-/**
- * Group order
- */
-__constant__ static unsigned int _N[8] = {
- 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xBAAEDCE6, 0xAF48A03B, 0xBFD25E8C, 0xD0364141
-};
-
-__constant__ static unsigned int _BETA[8] = {
- 0x7AE96A2B, 0x657C0710, 0x6E64479E, 0xAC3434E9, 0x9CF04975, 0x12F58995, 0xC1396C28, 0x719501EE
-};
-
-
-__constant__ static unsigned int _LAMBDA[8] = {
- 0x5363AD4C, 0xC05C30E0, 0xA5261C02, 0x8812645A, 0x122E22EA, 0x20816678, 0xDF02967C, 0x1B23BD72
-};
-
-
-__device__ __forceinline__ bool isInfinity(const unsigned int x[8])
-{
- bool isf = true;
-
- for(int i = 0; i < 8; i++) {
- if(x[i] != 0xffffffff) {
- isf = false;
- }
- }
-
- return isf;
-}
-
-__device__ __forceinline__ static void copyBigInt(const unsigned int src[8], unsigned int dest[8])
-{
- for(int i = 0; i < 8; i++) {
- dest[i] = src[i];
- }
-}
-
-__device__ static bool equal(const unsigned int *a, const unsigned int *b)
-{
- bool eq = true;
-
- for(int i = 0; i < 8; i++) {
- eq &= (a[i] == b[i]);
- }
-
- return eq;
-}
-
-/**
- * Reads an 8-word big integer from device memory
- */
-__device__ static void readInt(const unsigned int *ara, int idx, unsigned int x[8])
-{
- int totalThreads = gridDim.x * blockDim.x;
-
- int base = idx * totalThreads * 8;
-
- int threadId = blockDim.x * blockIdx.x + threadIdx.x;
-
- int index = base + threadId;
-
- for (int i = 0; i < 8; i++) {
- x[i] = ara[index];
- index += totalThreads;
- }
-}
-
-__device__ static unsigned int readIntLSW(const unsigned int *ara, int idx)
-{
- int totalThreads = gridDim.x * blockDim.x;
-
- int base = idx * totalThreads * 8;
-
- int threadId = blockDim.x * blockIdx.x + threadIdx.x;
-
- int index = base + threadId;
-
- return ara[index + totalThreads * 7];
-}
-
-/**
- * Writes an 8-word big integer to device memory
- */
-__device__ static void writeInt(unsigned int *ara, int idx, const unsigned int x[8])
-{
- int totalThreads = gridDim.x * blockDim.x;
-
- int base = idx * totalThreads * 8;
-
- int threadId = blockDim.x * blockIdx.x + threadIdx.x;
-
- int index = base + threadId;
-
- for (int i = 0; i < 8; i++) {
- ara[index] = x[i];
- index += totalThreads;
- }
-}
-
-/**
- * Subtraction mod p
- */
-__device__ static void subModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
- sub_cc(c[7], a[7], b[7]);
- subc_cc(c[6], a[6], b[6]);
- subc_cc(c[5], a[5], b[5]);
- subc_cc(c[4], a[4], b[4]);
- subc_cc(c[3], a[3], b[3]);
- subc_cc(c[2], a[2], b[2]);
- subc_cc(c[1], a[1], b[1]);
- subc_cc(c[0], a[0], b[0]);
-
- unsigned int borrow = 0;
- subc(borrow, 0, 0);
-
- if (borrow) {
- add_cc(c[7], c[7], _P[7]);
- addc_cc(c[6], c[6], _P[6]);
- addc_cc(c[5], c[5], _P[5]);
- addc_cc(c[4], c[4], _P[4]);
- addc_cc(c[3], c[3], _P[3]);
- addc_cc(c[2], c[2], _P[2]);
- addc_cc(c[1], c[1], _P[1]);
- addc(c[0], c[0], _P[0]);
- }
-}
-
-__device__ static unsigned int add(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
- add_cc(c[7], a[7], b[7]);
- addc_cc(c[6], a[6], b[6]);
- addc_cc(c[5], a[5], b[5]);
- addc_cc(c[4], a[4], b[4]);
- addc_cc(c[3], a[3], b[3]);
- addc_cc(c[2], a[2], b[2]);
- addc_cc(c[1], a[1], b[1]);
- addc_cc(c[0], a[0], b[0]);
-
- unsigned int carry = 0;
- addc(carry, 0, 0);
-
- return carry;
-}
-
-__device__ static unsigned int sub(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
- sub_cc(c[7], a[7], b[7]);
- subc_cc(c[6], a[6], b[6]);
- subc_cc(c[5], a[5], b[5]);
- subc_cc(c[4], a[4], b[4]);
- subc_cc(c[3], a[3], b[3]);
- subc_cc(c[2], a[2], b[2]);
- subc_cc(c[1], a[1], b[1]);
- subc_cc(c[0], a[0], b[0]);
-
- unsigned int borrow = 0;
- subc(borrow, 0, 0);
-
- return (borrow & 0x01);
-}
-
-
-__device__ static void addModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
- add_cc(c[7], a[7], b[7]);
- addc_cc(c[6], a[6], b[6]);
- addc_cc(c[5], a[5], b[5]);
- addc_cc(c[4], a[4], b[4]);
- addc_cc(c[3], a[3], b[3]);
- addc_cc(c[2], a[2], b[2]);
- addc_cc(c[1], a[1], b[1]);
- addc_cc(c[0], a[0], b[0]);
-
- unsigned int carry = 0;
- addc(carry, 0, 0);
-
- bool gt = false;
- for(int i = 0; i < 8; i++) {
- if(c[i] > _P[i]) {
- gt = true;
- break;
- } else if(c[i] < _P[i]) {
- break;
- }
- }
-
- if(carry || gt) {
- sub_cc(c[7], c[7], _P[7]);
- subc_cc(c[6], c[6], _P[6]);
- subc_cc(c[5], c[5], _P[5]);
- subc_cc(c[4], c[4], _P[4]);
- subc_cc(c[3], c[3], _P[3]);
- subc_cc(c[2], c[2], _P[2]);
- subc_cc(c[1], c[1], _P[1]);
- subc(c[0], c[0], _P[0]);
- }
-}
-
-
-
-__device__ static void mulModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
-{
- unsigned int high[8] = { 0 };
-
- unsigned int t = a[7];
-
- // a[7] * b (low)
- for(int i = 7; i >= 0; i--) {
- c[i] = t * b[i];
- }
-
- // a[7] * b (high)
- mad_hi_cc(c[6], t, b[7], c[6]);
- madc_hi_cc(c[5], t, b[6], c[5]);
- madc_hi_cc(c[4], t, b[5], c[4]);
- madc_hi_cc(c[3], t, b[4], c[3]);
- madc_hi_cc(c[2], t, b[3], c[2]);
- madc_hi_cc(c[1], t, b[2], c[1]);
- madc_hi_cc(c[0], t, b[1], c[0]);
- madc_hi(high[7], t, b[0], high[7]);
-
-
-
- // a[6] * b (low)
- t = a[6];
- mad_lo_cc(c[6], t, b[7], c[6]);
- madc_lo_cc(c[5], t, b[6], c[5]);
- madc_lo_cc(c[4], t, b[5], c[4]);
- madc_lo_cc(c[3], t, b[4], c[3]);
- madc_lo_cc(c[2], t, b[3], c[2]);
- madc_lo_cc(c[1], t, b[2], c[1]);
- madc_lo_cc(c[0], t, b[1], c[0]);
- madc_lo_cc(high[7], t, b[0], high[7]);
- addc(high[6], high[6], 0);
-
- // a[6] * b (high)
- mad_hi_cc(c[5], t, b[7], c[5]);
- madc_hi_cc(c[4], t, b[6], c[4]);
- madc_hi_cc(c[3], t, b[5], c[3]);
- madc_hi_cc(c[2], t, b[4], c[2]);
- madc_hi_cc(c[1], t, b[3], c[1]);
- madc_hi_cc(c[0], t, b[2], c[0]);
- madc_hi_cc(high[7], t, b[1], high[7]);
- madc_hi(high[6], t, b[0], high[6]);
-
- // a[5] * b (low)
- t = a[5];
- mad_lo_cc(c[5], t, b[7], c[5]);
- madc_lo_cc(c[4], t, b[6], c[4]);
- madc_lo_cc(c[3], t, b[5], c[3]);
- madc_lo_cc(c[2], t, b[4], c[2]);
- madc_lo_cc(c[1], t, b[3], c[1]);
- madc_lo_cc(c[0], t, b[2], c[0]);
- madc_lo_cc(high[7], t, b[1], high[7]);
- madc_lo_cc(high[6], t, b[0], high[6]);
- addc(high[5], high[5], 0);
-
- // a[5] * b (high)
- mad_hi_cc(c[4], t, b[7], c[4]);
- madc_hi_cc(c[3], t, b[6], c[3]);
- madc_hi_cc(c[2], t, b[5], c[2]);
- madc_hi_cc(c[1], t, b[4], c[1]);
- madc_hi_cc(c[0], t, b[3], c[0]);
- madc_hi_cc(high[7], t, b[2], high[7]);
- madc_hi_cc(high[6], t, b[1], high[6]);
- madc_hi(high[5], t, b[0], high[5]);
-
-
-
- // a[4] * b (low)
- t = a[4];
- mad_lo_cc(c[4], t, b[7], c[4]);
- madc_lo_cc(c[3], t, b[6], c[3]);
- madc_lo_cc(c[2], t, b[5], c[2]);
- madc_lo_cc(c[1], t, b[4], c[1]);
- madc_lo_cc(c[0], t, b[3], c[0]);
- madc_lo_cc(high[7], t, b[2], high[7]);
- madc_lo_cc(high[6], t, b[1], high[6]);
- madc_lo_cc(high[5], t, b[0], high[5]);
- addc(high[4], high[4], 0);
-
- // a[4] * b (high)
- mad_hi_cc(c[3], t, b[7], c[3]);
- madc_hi_cc(c[2], t, b[6], c[2]);
- madc_hi_cc(c[1], t, b[5], c[1]);
- madc_hi_cc(c[0], t, b[4], c[0]);
- madc_hi_cc(high[7], t, b[3], high[7]);
- madc_hi_cc(high[6], t, b[2], high[6]);
- madc_hi_cc(high[5], t, b[1], high[5]);
- madc_hi(high[4], t, b[0], high[4]);
-
-
-
- // a[3] * b (low)
- t = a[3];
- mad_lo_cc(c[3], t, b[7], c[3]);
- madc_lo_cc(c[2], t, b[6], c[2]);
- madc_lo_cc(c[1], t, b[5], c[1]);
- madc_lo_cc(c[0], t, b[4], c[0]);
- madc_lo_cc(high[7], t, b[3], high[7]);
- madc_lo_cc(high[6], t, b[2], high[6]);
- madc_lo_cc(high[5], t, b[1], high[5]);
- madc_lo_cc(high[4], t, b[0], high[4]);
- addc(high[3], high[3], 0);
-
- // a[3] * b (high)
- mad_hi_cc(c[2], t, b[7], c[2]);
- madc_hi_cc(c[1], t, b[6], c[1]);
- madc_hi_cc(c[0], t, b[5], c[0]);
- madc_hi_cc(high[7], t, b[4], high[7]);
- madc_hi_cc(high[6], t, b[3], high[6]);
- madc_hi_cc(high[5], t, b[2], high[5]);
- madc_hi_cc(high[4], t, b[1], high[4]);
- madc_hi(high[3], t, b[0], high[3]);
-
-
-
- // a[2] * b (low)
- t = a[2];
- mad_lo_cc(c[2], t, b[7], c[2]);
- madc_lo_cc(c[1], t, b[6], c[1]);
- madc_lo_cc(c[0], t, b[5], c[0]);
- madc_lo_cc(high[7], t, b[4], high[7]);
- madc_lo_cc(high[6], t, b[3], high[6]);
- madc_lo_cc(high[5], t, b[2], high[5]);
- madc_lo_cc(high[4], t, b[1], high[4]);
- madc_lo_cc(high[3], t, b[0], high[3]);
- addc(high[2], high[2], 0);
-
- // a[2] * b (high)
- mad_hi_cc(c[1], t, b[7], c[1]);
- madc_hi_cc(c[0], t, b[6], c[0]);
- madc_hi_cc(high[7], t, b[5], high[7]);
- madc_hi_cc(high[6], t, b[4], high[6]);
- madc_hi_cc(high[5], t, b[3], high[5]);
- madc_hi_cc(high[4], t, b[2], high[4]);
- madc_hi_cc(high[3], t, b[1], high[3]);
- madc_hi(high[2], t, b[0], high[2]);
-
-
-
- // a[1] * b (low)
- t = a[1];
- mad_lo_cc(c[1], t, b[7], c[1]);
- madc_lo_cc(c[0], t, b[6], c[0]);
- madc_lo_cc(high[7], t, b[5], high[7]);
- madc_lo_cc(high[6], t, b[4], high[6]);
- madc_lo_cc(high[5], t, b[3], high[5]);
- madc_lo_cc(high[4], t, b[2], high[4]);
- madc_lo_cc(high[3], t, b[1], high[3]);
- madc_lo_cc(high[2], t, b[0], high[2]);
- addc(high[1], high[1], 0);
-
- // a[1] * b (high)
- mad_hi_cc(c[0], t, b[7], c[0]);
- madc_hi_cc(high[7], t, b[6], high[7]);
- madc_hi_cc(high[6], t, b[5], high[6]);
- madc_hi_cc(high[5], t, b[4], high[5]);
- madc_hi_cc(high[4], t, b[3], high[4]);
- madc_hi_cc(high[3], t, b[2], high[3]);
- madc_hi_cc(high[2], t, b[1], high[2]);
- madc_hi(high[1], t, b[0], high[1]);
-
-
-
- // a[0] * b (low)
- t = a[0];
- mad_lo_cc(c[0], t, b[7], c[0]);
- madc_lo_cc(high[7], t, b[6], high[7]);
- madc_lo_cc(high[6], t, b[5], high[6]);
- madc_lo_cc(high[5], t, b[4], high[5]);
- madc_lo_cc(high[4], t, b[3], high[4]);
- madc_lo_cc(high[3], t, b[2], high[3]);
- madc_lo_cc(high[2], t, b[1], high[2]);
- madc_lo_cc(high[1], t, b[0], high[1]);
- addc(high[0], high[0], 0);
-
- // a[0] * b (high)
- mad_hi_cc(high[7], t, b[7], high[7]);
- madc_hi_cc(high[6], t, b[6], high[6]);
- madc_hi_cc(high[5], t, b[5], high[5]);
- madc_hi_cc(high[4], t, b[4], high[4]);
- madc_hi_cc(high[3], t, b[3], high[3]);
- madc_hi_cc(high[2], t, b[2], high[2]);
- madc_hi_cc(high[1], t, b[1], high[1]);
- madc_hi(high[0], t, b[0], high[0]);
-
-
-
- // At this point we have 16 32-bit words representing a 512-bit value
- // high[0 ... 7] and c[0 ... 7]
- const unsigned int s = 977;
-
- // Store high[6] and high[7] since they will be overwritten
- unsigned int high7 = high[7];
- unsigned int high6 = high[6];
-
-
- // Take high 256 bits, multiply by 2^32, add to low 256 bits
- // That is, take high[0 ... 7], shift it left 1 word and add it to c[0 ... 7]
- add_cc(c[6], high[7], c[6]);
- addc_cc(c[5], high[6], c[5]);
- addc_cc(c[4], high[5], c[4]);
- addc_cc(c[3], high[4], c[3]);
- addc_cc(c[2], high[3], c[2]);
- addc_cc(c[1], high[2], c[1]);
- addc_cc(c[0], high[1], c[0]);
- addc_cc(high[7], high[0], 0);
- addc(high[6], 0, 0);
-
-
- // Take high 256 bits, multiply by 977, add to low 256 bits
- // That is, take high[0 ... 5], high6, high7, multiply by 977 and add to c[0 ... 7]
- mad_lo_cc(c[7], high7, s, c[7]);
- madc_lo_cc(c[6], high6, s, c[6]);
- madc_lo_cc(c[5], high[5], s, c[5]);
- madc_lo_cc(c[4], high[4], s, c[4]);
- madc_lo_cc(c[3], high[3], s, c[3]);
- madc_lo_cc(c[2], high[2], s, c[2]);
- madc_lo_cc(c[1], high[1], s, c[1]);
- madc_lo_cc(c[0], high[0], s, c[0]);
- addc_cc(high[7], high[7], 0);
- addc(high[6], high[6], 0);
-
-
- mad_hi_cc(c[6], high7, s, c[6]);
- madc_hi_cc(c[5], high6, s, c[5]);
- madc_hi_cc(c[4], high[5], s, c[4]);
- madc_hi_cc(c[3], high[4], s, c[3]);
- madc_hi_cc(c[2], high[3], s, c[2]);
- madc_hi_cc(c[1], high[2], s, c[1]);
- madc_hi_cc(c[0], high[1], s, c[0]);
- madc_hi_cc(high[7], high[0], s, high[7]);
- addc(high[6], high[6], 0);
-
-
- // Repeat the same steps, but this time we only need to handle high[6] and high[7]
- high7 = high[7];
- high6 = high[6];
-
- // Take the high 64 bits, multiply by 2^32 and add to the low 256 bits
- add_cc(c[6], high[7], c[6]);
- addc_cc(c[5], high[6], c[5]);
- addc_cc(c[4], c[4], 0);
- addc_cc(c[3], c[3], 0);
- addc_cc(c[2], c[2], 0);
- addc_cc(c[1], c[1], 0);
- addc_cc(c[0], c[0], 0);
- addc(high[7], 0, 0);
-
-
- // Take the high 64 bits, multiply by 977 and add to the low 256 bits
- mad_lo_cc(c[7], high7, s, c[7]);
- madc_lo_cc(c[6], high6, s, c[6]);
- addc_cc(c[5], c[5], 0);
- addc_cc(c[4], c[4], 0);
- addc_cc(c[3], c[3], 0);
- addc_cc(c[2], c[2], 0);
- addc_cc(c[1], c[1], 0);
- addc_cc(c[0], c[0], 0);
- addc(high[7], high[7], 0);
-
- mad_hi_cc(c[6], high7, s, c[6]);
- madc_hi_cc(c[5], high6, s, c[5]);
- addc_cc(c[4], c[4], 0);
- addc_cc(c[3], c[3], 0);
- addc_cc(c[2], c[2], 0);
- addc_cc(c[1], c[1], 0);
- addc_cc(c[0], c[0], 0);
- addc(high[7], high[7], 0);
-
-
- bool overflow = high[7] != 0;
-
- unsigned int borrow = sub(c, _P, c);
-
- if(overflow) {
- if(!borrow) {
- sub(c, _P, c);
- }
- } else {
- if(borrow) {
- add(c, _P, c);
- }
- }
-}
-
-
-/**
- * Square mod P
- * b = a * a
- */
-__device__ static void squareModP(const unsigned int a[8], unsigned int b[8])
-{
- mulModP(a, a, b);
-}
-
-/**
- * Square mod P
- * x = x * x
- */
-__device__ static void squareModP(unsigned int x[8])
-{
- unsigned int tmp[8];
- squareModP(x, tmp);
- copyBigInt(tmp, x);
-}
-
-/**
- * Multiply mod P
- * c = a * c
- */
-__device__ static void mulModP(const unsigned int a[8], unsigned int c[8])
-{
- unsigned int tmp[8];
- mulModP(a, c, tmp);
-
- copyBigInt(tmp, c);
-}
-
-/**
- * Multiplicative inverse mod P using Fermat's method of x^(p-2) mod p and addition chains
- */
-__device__ static void invModP(unsigned int value[8])
-{
- unsigned int x[8];
-
- copyBigInt(value, x);
-
- unsigned int y[8] = { 0, 0, 0, 0, 0, 0, 0, 1 };
-
- // 0xd - 1101
- mulModP(x, y);
- squareModP(x);
- //mulModP(x, y);
- squareModP(x);
- mulModP(x, y);
- squareModP(x);
- mulModP(x, y);
- squareModP(x);
-
-
- // 0x2 - 0010
- //mulModP(x, y);
- squareModP(x);
- mulModP(x, y);
- squareModP(x);
- //mulModP(x, y);
- squareModP(x);
- //mulModP(x, y);
- squareModP(x);
-
- // 0xc = 0x1100
- //mulModP(x, y);
- squareModP(x);
- //mulModP(x, y);
- squareModP(x);
- mulModP(x, y);
- squareModP(x);
- mulModP(x, y);
- squareModP(x);
-
- // 0xfffff
- for(int i = 0; i < 20; i++) {
- mulModP(x, y);
- squareModP(x);
- }
-
- // 0xe - 1110
- //mulModP(x, y);
- squareModP(x);
- mulModP(x, y);
- squareModP(x);
- mulModP(x, y);
- squareModP(x);
- mulModP(x, y);
- squareModP(x);
-
- // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffff
- for(int i = 0; i < 219; i++) {
- mulModP(x, y);
- squareModP(x);
- }
- mulModP(x, y);
-
- copyBigInt(y, value);
-}
-
-__device__ static void invModP(const unsigned int *value, unsigned int *inverse)
-{
- copyBigInt(value, inverse);
-
- invModP(inverse);
-}
-
-__device__ static void negModP(const unsigned int *value, unsigned int *negative)
-{
- sub_cc(negative[0], _P[0], value[0]);
- subc_cc(negative[1], _P[1], value[1]);
- subc_cc(negative[2], _P[2], value[2]);
- subc_cc(negative[3], _P[3], value[3]);
- subc_cc(negative[4], _P[4], value[4]);
- subc_cc(negative[5], _P[5], value[5]);
- subc_cc(negative[6], _P[6], value[6]);
- subc(negative[7], _P[7], value[7]);
-}
-
-
-__device__ __forceinline__ static void beginBatchAdd(const unsigned int *px, const unsigned int *x, unsigned int *chain, int i, int batchIdx, unsigned int inverse[8])
-{
- // x = Gx - x
- unsigned int t[8];
- subModP(px, x, t);
-
- // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1,
- // c[2] = diff2 * diff1 * diff0, etc
- mulModP(t, inverse);
-
- writeInt(chain, batchIdx, inverse);
-}
-
-
-__device__ __forceinline__ static void beginBatchAddWithDouble(const unsigned int *px, const unsigned int *py, unsigned int *xPtr, unsigned int *chain, int i, int batchIdx, unsigned int inverse[8])
-{
- unsigned int x[8];
- readInt(xPtr, i, x);
-
- if(equal(px, x)) {
- addModP(py, py, x);
- } else {
- // x = Gx - x
- subModP(px, x, x);
- }
-
- // Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1,
- // c[2] = diff2 * diff1 * diff0, etc
- mulModP(x, inverse);
-
- writeInt(chain, batchIdx, inverse);
-}
-
-__device__ static void completeBatchAddWithDouble(const unsigned int *px, const unsigned int *py, const unsigned int *xPtr, const unsigned int *yPtr, int i, int batchIdx, unsigned int *chain, unsigned int *inverse, unsigned int newX[8], unsigned int newY[8])
-{
- unsigned int s[8];
- unsigned int x[8];
- unsigned int y[8];
-
- readInt(xPtr, i, x);
- readInt(yPtr, i, y);
-
- if(batchIdx >= 1) {
- unsigned int c[8];
-
- readInt(chain, batchIdx - 1, c);
-
- mulModP(inverse, c, s);
-
- unsigned int diff[8];
- if(equal(px, x)) {
- addModP(py, py, diff);
- } else {
- subModP(px, x, diff);
- }
-
- mulModP(diff, inverse);
- } else {
- copyBigInt(inverse, s);
- }
-
-
- if(equal(px, x)) {
- // currently s = 1 / 2y
-
- unsigned int x2[8];
- unsigned int tx2[8];
-
- // 3x^2
- mulModP(x, x, x2);
- addModP(x2, x2, tx2);
- addModP(x2, tx2, tx2);
-
-
- // s = 3x^2 * 1/2y
- mulModP(tx2, s);
-
- // s^2
- unsigned int s2[8];
- mulModP(s, s, s2);
-
- // Rx = s^2 - 2px
- subModP(s2, x, newX);
- subModP(newX, x, newX);
-
- // Ry = s(px - rx) - py
- unsigned int k[8];
- subModP(px, newX, k);
- mulModP(s, k, newY);
- subModP(newY, py, newY);
-
- } else {
-
- unsigned int rise[8];
- subModP(py, y, rise);
-
- mulModP(rise, s);
-
- // Rx = s^2 - Gx - Qx
- unsigned int s2[8];
- mulModP(s, s, s2);
-
- subModP(s2, px, newX);
- subModP(newX, x, newX);
-
- // Ry = s(px - rx) - py
- unsigned int k[8];
- subModP(px, newX, k);
- mulModP(s, k, newY);
- subModP(newY, py, newY);
- }
-}
-
-__device__ static void completeBatchAdd(const unsigned int *px, const unsigned int *py, unsigned int *xPtr, unsigned int *yPtr, int i, int batchIdx, unsigned int *chain, unsigned int *inverse, unsigned int newX[8], unsigned int newY[8])
-{
- unsigned int s[8];
- unsigned int x[8];
-
- readInt(xPtr, i, x);
-
- if(batchIdx >= 1) {
- unsigned int c[8];
-
- readInt(chain, batchIdx - 1, c);
- mulModP(inverse, c, s);
-
- unsigned int diff[8];
- subModP(px, x, diff);
- mulModP(diff, inverse);
- } else {
- copyBigInt(inverse, s);
- }
-
- unsigned int y[8];
- readInt(yPtr, i, y);
-
- unsigned int rise[8];
- subModP(py, y, rise);
-
- mulModP(rise, s);
-
- // Rx = s^2 - Gx - Qx
- unsigned int s2[8];
- mulModP(s, s, s2);
- subModP(s2, px, newX);
- subModP(newX, x, newX);
-
- // Ry = s(px - rx) - py
- unsigned int k[8];
- subModP(px, newX, k);
- mulModP(s, k, newY);
- subModP(newY, py, newY);
-}
-
-
-__device__ __forceinline__ static void doBatchInverse(unsigned int inverse[8])
-{
- invModP(inverse);
-}
-
-#endif
\ No newline at end of file
diff --git a/cudaMath/sha256.cuh b/cudaMath/sha256.cuh
deleted file mode 100644
index b04aa9a..0000000
--- a/cudaMath/sha256.cuh
+++ /dev/null
@@ -1,545 +0,0 @@
-#ifndef _SHA256_CUH
-#define _SHA256_CUH
-
-#include
-#include
-
-#include
-
-
-__constant__ unsigned int _K[64] = {
- 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
- 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
- 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
- 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
- 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
- 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
- 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
- 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-__constant__ unsigned int _IV[8] = {
- 0x6a09e667,
- 0xbb67ae85,
- 0x3c6ef372,
- 0xa54ff53a,
- 0x510e527f,
- 0x9b05688c,
- 0x1f83d9ab,
- 0x5be0cd19
-};
-
-
-__device__ __forceinline__ unsigned int rotr(unsigned int x, int n)
-{
- return (x >> n) ^ (x << (32 - n));
-}
-
-__device__ __forceinline__ unsigned int MAJ(unsigned int a, unsigned int b, unsigned int c)
-{
- return (a & b) ^ (a & c) ^ (b & c);
-}
-
-__device__ __forceinline__ unsigned int CH(unsigned int e, unsigned int f, unsigned int g)
-{
- return (e & f) ^ (~e & g);
-}
-
-__device__ __forceinline__ unsigned int s0(unsigned int x)
-{
- return rotr(x, 7) ^ rotr(x, 18) ^ (x >> 3);
-}
-
-__device__ __forceinline__ unsigned int s1(unsigned int x)
-{
- return rotr(x, 17) ^ rotr(x, 19) ^ (x >> 10);
-}
-
-
-__device__ __forceinline__ void round(unsigned int a, unsigned int b, unsigned int c, unsigned int &d, unsigned e, unsigned int f, unsigned int g, unsigned int &h, unsigned int m, unsigned int k)
-{
- unsigned int s = CH(e, f, g) + (rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25)) + k + m;
-
- d += s + h;
-
- h += s + MAJ(a, b, c) + (rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22));
-}
-
-__device__ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8], unsigned int digest[8])
-{
- unsigned int a, b, c, d, e, f, g, h;
- unsigned int w[16];
-
- // 0x04 || x || y
- w[0] = (x[0] >> 8) | 0x04000000;
- w[1] = (x[1] >> 8) | (x[0] << 24);
- w[2] = (x[2] >> 8) | (x[1] << 24);
- w[3] = (x[3] >> 8) | (x[2] << 24);
- w[4] = (x[4] >> 8) | (x[3] << 24);
- w[5] = (x[5] >> 8) | (x[4] << 24);
- w[6] = (x[6] >> 8) | (x[5] << 24);
- w[7] = (x[7] >> 8) | (x[6] << 24);
- w[8] = (y[0] >> 8) | (x[7] << 24);
- w[9] = (y[1] >> 8) | (y[0] << 24);
- w[10] = (y[2] >> 8) | (y[1] << 24);
- w[11] = (y[3] >> 8) | (y[2] << 24);
- w[12] = (y[4] >> 8) | (y[3] << 24);
- w[13] = (y[5] >> 8) | (y[4] << 24);
- w[14] = (y[6] >> 8) | (y[5] << 24);
- w[15] = (y[7] >> 8) | (y[6] << 24);
-
- a = _IV[0];
- b = _IV[1];
- c = _IV[2];
- d = _IV[3];
- e = _IV[4];
- f = _IV[5];
- g = _IV[6];
- h = _IV[7];
-
-
- round(a, b, c, d, e, f, g, h, w[0], _K[0]);
- round(h, a, b, c, d, e, f, g, w[1], _K[1]);
- round(g, h, a, b, c, d, e, f, w[2], _K[2]);
- round(f, g, h, a, b, c, d, e, w[3], _K[3]);
- round(e, f, g, h, a, b, c, d, w[4], _K[4]);
- round(d, e, f, g, h, a, b, c, w[5], _K[5]);
- round(c, d, e, f, g, h, a, b, w[6], _K[6]);
- round(b, c, d, e, f, g, h, a, w[7], _K[7]);
- round(a, b, c, d, e, f, g, h, w[8], _K[8]);
- round(h, a, b, c, d, e, f, g, w[9], _K[9]);
- round(g, h, a, b, c, d, e, f, w[10], _K[10]);
- round(f, g, h, a, b, c, d, e, w[11], _K[11]);
- round(e, f, g, h, a, b, c, d, w[12], _K[12]);
- round(d, e, f, g, h, a, b, c, w[13], _K[13]);
- round(c, d, e, f, g, h, a, b, w[14], _K[14]);
- round(b, c, d, e, f, g, h, a, w[15], _K[15]);
-
-
-
- w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
- w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
- w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
- w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
- w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
- w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
- w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
- w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
- w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
- w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
- w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
- w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
- w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
- w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
- w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
- w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
- round(a, b, c, d, e, f, g, h, w[0], _K[16]);
- round(h, a, b, c, d, e, f, g, w[1], _K[17]);
- round(g, h, a, b, c, d, e, f, w[2], _K[18]);
- round(f, g, h, a, b, c, d, e, w[3], _K[19]);
- round(e, f, g, h, a, b, c, d, w[4], _K[20]);
- round(d, e, f, g, h, a, b, c, w[5], _K[21]);
- round(c, d, e, f, g, h, a, b, w[6], _K[22]);
- round(b, c, d, e, f, g, h, a, w[7], _K[23]);
- round(a, b, c, d, e, f, g, h, w[8], _K[24]);
- round(h, a, b, c, d, e, f, g, w[9], _K[25]);
- round(g, h, a, b, c, d, e, f, w[10], _K[26]);
- round(f, g, h, a, b, c, d, e, w[11], _K[27]);
- round(e, f, g, h, a, b, c, d, w[12], _K[28]);
- round(d, e, f, g, h, a, b, c, w[13], _K[29]);
- round(c, d, e, f, g, h, a, b, w[14], _K[30]);
- round(b, c, d, e, f, g, h, a, w[15], _K[31]);
-
-
- w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
- w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
- w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
- w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
- w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
- w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
- w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
- w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
- w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
- w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
- w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
- w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
- w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
- w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
- w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
- w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
- round(a, b, c, d, e, f, g, h, w[0], _K[32]);
- round(h, a, b, c, d, e, f, g, w[1], _K[33]);
- round(g, h, a, b, c, d, e, f, w[2], _K[34]);
- round(f, g, h, a, b, c, d, e, w[3], _K[35]);
- round(e, f, g, h, a, b, c, d, w[4], _K[36]);
- round(d, e, f, g, h, a, b, c, w[5], _K[37]);
- round(c, d, e, f, g, h, a, b, w[6], _K[38]);
- round(b, c, d, e, f, g, h, a, w[7], _K[39]);
- round(a, b, c, d, e, f, g, h, w[8], _K[40]);
- round(h, a, b, c, d, e, f, g, w[9], _K[41]);
- round(g, h, a, b, c, d, e, f, w[10], _K[42]);
- round(f, g, h, a, b, c, d, e, w[11], _K[43]);
- round(e, f, g, h, a, b, c, d, w[12], _K[44]);
- round(d, e, f, g, h, a, b, c, w[13], _K[45]);
- round(c, d, e, f, g, h, a, b, w[14], _K[46]);
- round(b, c, d, e, f, g, h, a, w[15], _K[47]);
-
-
-
-
- w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
- w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
- w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
- w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
- w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
- w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
- w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
- w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
- w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
- w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
- w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
- w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
- w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
- w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
- w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
- w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
- round(a, b, c, d, e, f, g, h, w[0], _K[48]);
- round(h, a, b, c, d, e, f, g, w[1], _K[49]);
- round(g, h, a, b, c, d, e, f, w[2], _K[50]);
- round(f, g, h, a, b, c, d, e, w[3], _K[51]);
- round(e, f, g, h, a, b, c, d, w[4], _K[52]);
- round(d, e, f, g, h, a, b, c, w[5], _K[53]);
- round(c, d, e, f, g, h, a, b, w[6], _K[54]);
- round(b, c, d, e, f, g, h, a, w[7], _K[55]);
- round(a, b, c, d, e, f, g, h, w[8], _K[56]);
- round(h, a, b, c, d, e, f, g, w[9], _K[57]);
- round(g, h, a, b, c, d, e, f, w[10], _K[58]);
- round(f, g, h, a, b, c, d, e, w[11], _K[59]);
- round(e, f, g, h, a, b, c, d, w[12], _K[60]);
- round(d, e, f, g, h, a, b, c, w[13], _K[61]);
- round(c, d, e, f, g, h, a, b, w[14], _K[62]);
- round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
- a += _IV[0];
- b += _IV[1];
- c += _IV[2];
- d += _IV[3];
- e += _IV[4];
- f += _IV[5];
- g += _IV[6];
- h += _IV[7];
-
- // store the intermediate hash value
- unsigned int tmp[8];
- tmp[0] = a;
- tmp[1] = b;
- tmp[2] = c;
- tmp[3] = d;
- tmp[4] = e;
- tmp[5] = f;
- tmp[6] = g;
- tmp[7] = h;
-
- w[0] = (y[7] << 24) | 0x00800000;
- w[15] = 65 * 8;
-
- round(a, b, c, d, e, f, g, h, w[0], _K[0]);
- round(h, a, b, c, d, e, f, g, 0, _K[1]);
- round(g, h, a, b, c, d, e, f, 0, _K[2]);
- round(f, g, h, a, b, c, d, e, 0, _K[3]);
- round(e, f, g, h, a, b, c, d, 0, _K[4]);
- round(d, e, f, g, h, a, b, c, 0, _K[5]);
- round(c, d, e, f, g, h, a, b, 0, _K[6]);
- round(b, c, d, e, f, g, h, a, 0, _K[7]);
- round(a, b, c, d, e, f, g, h, 0, _K[8]);
- round(h, a, b, c, d, e, f, g, 0, _K[9]);
- round(g, h, a, b, c, d, e, f, 0, _K[10]);
- round(f, g, h, a, b, c, d, e, 0, _K[11]);
- round(e, f, g, h, a, b, c, d, 0, _K[12]);
- round(d, e, f, g, h, a, b, c, 0, _K[13]);
- round(c, d, e, f, g, h, a, b, 0, _K[14]);
- round(b, c, d, e, f, g, h, a, w[15], _K[15]);
-
- w[0] = w[0] + s0(0) + 0 + s1(0);
- w[1] = 0 + s0(0) + 0 + s1(w[15]);
- w[2] = 0 + s0(0) + 0 + s1(w[0]);
- w[3] = 0 + s0(0) + 0 + s1(w[1]);
- w[4] = 0 + s0(0) + 0 + s1(w[2]);
- w[5] = 0 + s0(0) + 0 + s1(w[3]);
- w[6] = 0 + s0(0) + w[15] + s1(w[4]);
- w[7] = 0 + s0(0) + w[0] + s1(w[5]);
- w[8] = 0 + s0(0) + w[1] + s1(w[6]);
- w[9] = 0 + s0(0) + w[2] + s1(w[7]);
- w[10] = 0 + s0(0) + w[3] + s1(w[8]);
- w[11] = 0 + s0(0) + w[4] + s1(w[9]);
- w[12] = 0 + s0(0) + w[5] + s1(w[10]);
- w[13] = 0 + s0(0) + w[6] + s1(w[11]);
- w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]);
- w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
-
-
- round(a, b, c, d, e, f, g, h, w[0], _K[16]);
- round(h, a, b, c, d, e, f, g, w[1], _K[17]);
- round(g, h, a, b, c, d, e, f, w[2], _K[18]);
- round(f, g, h, a, b, c, d, e, w[3], _K[19]);
- round(e, f, g, h, a, b, c, d, w[4], _K[20]);
- round(d, e, f, g, h, a, b, c, w[5], _K[21]);
- round(c, d, e, f, g, h, a, b, w[6], _K[22]);
- round(b, c, d, e, f, g, h, a, w[7], _K[23]);
- round(a, b, c, d, e, f, g, h, w[8], _K[24]);
- round(h, a, b, c, d, e, f, g, w[9], _K[25]);
- round(g, h, a, b, c, d, e, f, w[10], _K[26]);
- round(f, g, h, a, b, c, d, e, w[11], _K[27]);
- round(e, f, g, h, a, b, c, d, w[12], _K[28]);
- round(d, e, f, g, h, a, b, c, w[13], _K[29]);
- round(c, d, e, f, g, h, a, b, w[14], _K[30]);
- round(b, c, d, e, f, g, h, a, w[15], _K[31]);
-
- w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
- w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
- w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
- w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
- w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
- w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
- w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
- w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
- w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
- w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
- w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
- w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
- w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
- w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
- w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
- w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
- round(a, b, c, d, e, f, g, h, w[0], _K[32]);
- round(h, a, b, c, d, e, f, g, w[1], _K[33]);
- round(g, h, a, b, c, d, e, f, w[2], _K[34]);
- round(f, g, h, a, b, c, d, e, w[3], _K[35]);
- round(e, f, g, h, a, b, c, d, w[4], _K[36]);
- round(d, e, f, g, h, a, b, c, w[5], _K[37]);
- round(c, d, e, f, g, h, a, b, w[6], _K[38]);
- round(b, c, d, e, f, g, h, a, w[7], _K[39]);
- round(a, b, c, d, e, f, g, h, w[8], _K[40]);
- round(h, a, b, c, d, e, f, g, w[9], _K[41]);
- round(g, h, a, b, c, d, e, f, w[10], _K[42]);
- round(f, g, h, a, b, c, d, e, w[11], _K[43]);
- round(e, f, g, h, a, b, c, d, w[12], _K[44]);
- round(d, e, f, g, h, a, b, c, w[13], _K[45]);
- round(c, d, e, f, g, h, a, b, w[14], _K[46]);
- round(b, c, d, e, f, g, h, a, w[15], _K[47]);
-
- w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
- w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
- w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
- w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
- w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
- w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
- w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
- w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
- w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
- w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
- w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
- w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
- w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
- w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
- w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
- w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
- round(a, b, c, d, e, f, g, h, w[0], _K[48]);
- round(h, a, b, c, d, e, f, g, w[1], _K[49]);
- round(g, h, a, b, c, d, e, f, w[2], _K[50]);
- round(f, g, h, a, b, c, d, e, w[3], _K[51]);
- round(e, f, g, h, a, b, c, d, w[4], _K[52]);
- round(d, e, f, g, h, a, b, c, w[5], _K[53]);
- round(c, d, e, f, g, h, a, b, w[6], _K[54]);
- round(b, c, d, e, f, g, h, a, w[7], _K[55]);
- round(a, b, c, d, e, f, g, h, w[8], _K[56]);
- round(h, a, b, c, d, e, f, g, w[9], _K[57]);
- round(g, h, a, b, c, d, e, f, w[10], _K[58]);
- round(f, g, h, a, b, c, d, e, w[11], _K[59]);
- round(e, f, g, h, a, b, c, d, w[12], _K[60]);
- round(d, e, f, g, h, a, b, c, w[13], _K[61]);
- round(c, d, e, f, g, h, a, b, w[14], _K[62]);
- round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
- digest[0] = tmp[0] + a;
- digest[1] = tmp[1] + b;
- digest[2] = tmp[2] + c;
- digest[3] = tmp[3] + d;
- digest[4] = tmp[4] + e;
- digest[5] = tmp[5] + f;
- digest[6] = tmp[6] + g;
- digest[7] = tmp[7] + h;
-}
-
-__device__ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8])
-{
- unsigned int a, b, c, d, e, f, g, h;
- unsigned int w[16];
-
- // 0x03 || x or 0x02 || x
- w[0] = 0x02000000 | ((yParity & 1) << 24) | (x[0] >> 8);
-
- w[1] = (x[1] >> 8) | (x[0] << 24);
- w[2] = (x[2] >> 8) | (x[1] << 24);
- w[3] = (x[3] >> 8) | (x[2] << 24);
- w[4] = (x[4] >> 8) | (x[3] << 24);
- w[5] = (x[5] >> 8) | (x[4] << 24);
- w[6] = (x[6] >> 8) | (x[5] << 24);
- w[7] = (x[7] >> 8) | (x[6] << 24);
- w[8] = (x[7] << 24) | 0x00800000;
- w[15] = 33 * 8;
-
- a = _IV[0];
- b = _IV[1];
- c = _IV[2];
- d = _IV[3];
- e = _IV[4];
- f = _IV[5];
- g = _IV[6];
- h = _IV[7];
-
- round(a, b, c, d, e, f, g, h, w[0], _K[0]);
- round(h, a, b, c, d, e, f, g, w[1], _K[1]);
- round(g, h, a, b, c, d, e, f, w[2], _K[2]);
- round(f, g, h, a, b, c, d, e, w[3], _K[3]);
- round(e, f, g, h, a, b, c, d, w[4], _K[4]);
- round(d, e, f, g, h, a, b, c, w[5], _K[5]);
- round(c, d, e, f, g, h, a, b, w[6], _K[6]);
- round(b, c, d, e, f, g, h, a, w[7], _K[7]);
- round(a, b, c, d, e, f, g, h, w[8], _K[8]);
- round(h, a, b, c, d, e, f, g, 0, _K[9]);
- round(g, h, a, b, c, d, e, f, 0, _K[10]);
- round(f, g, h, a, b, c, d, e, 0, _K[11]);
- round(e, f, g, h, a, b, c, d, 0, _K[12]);
- round(d, e, f, g, h, a, b, c, 0, _K[13]);
- round(c, d, e, f, g, h, a, b, 0, _K[14]);
- round(b, c, d, e, f, g, h, a, w[15], _K[15]);
-
- w[0] = w[0] + s0(w[1]) + 0 + s1(0);
- w[1] = w[1] + s0(w[2]) + 0 + s1(w[15]);
- w[2] = w[2] + s0(w[3]) + 0 + s1(w[0]);
- w[3] = w[3] + s0(w[4]) + 0 + s1(w[1]);
- w[4] = w[4] + s0(w[5]) + 0 + s1(w[2]);
- w[5] = w[5] + s0(w[6]) + 0 + s1(w[3]);
- w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
- w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
- w[8] = w[8] + s0(0) + w[1] + s1(w[6]);
- w[9] = 0 + s0(0) + w[2] + s1(w[7]);
- w[10] = 0 + s0(0) + w[3] + s1(w[8]);
- w[11] = 0 + s0(0) + w[4] + s1(w[9]);
- w[12] = 0 + s0(0) + w[5] + s1(w[10]);
- w[13] = 0 + s0(0) + w[6] + s1(w[11]);
- w[14] = 0 + s0(w[15]) + w[7] + s1(w[12]);
- w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
- round(a, b, c, d, e, f, g, h, w[0], _K[16]);
- round(h, a, b, c, d, e, f, g, w[1], _K[17]);
- round(g, h, a, b, c, d, e, f, w[2], _K[18]);
- round(f, g, h, a, b, c, d, e, w[3], _K[19]);
- round(e, f, g, h, a, b, c, d, w[4], _K[20]);
- round(d, e, f, g, h, a, b, c, w[5], _K[21]);
- round(c, d, e, f, g, h, a, b, w[6], _K[22]);
- round(b, c, d, e, f, g, h, a, w[7], _K[23]);
- round(a, b, c, d, e, f, g, h, w[8], _K[24]);
- round(h, a, b, c, d, e, f, g, w[9], _K[25]);
- round(g, h, a, b, c, d, e, f, w[10], _K[26]);
- round(f, g, h, a, b, c, d, e, w[11], _K[27]);
- round(e, f, g, h, a, b, c, d, w[12], _K[28]);
- round(d, e, f, g, h, a, b, c, w[13], _K[29]);
- round(c, d, e, f, g, h, a, b, w[14], _K[30]);
- round(b, c, d, e, f, g, h, a, w[15], _K[31]);
-
- w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
- w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
- w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
- w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
- w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
- w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
- w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
- w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
- w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
- w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
- w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
- w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
- w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
- w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
- w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
- w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
- round(a, b, c, d, e, f, g, h, w[0], _K[32]);
- round(h, a, b, c, d, e, f, g, w[1], _K[33]);
- round(g, h, a, b, c, d, e, f, w[2], _K[34]);
- round(f, g, h, a, b, c, d, e, w[3], _K[35]);
- round(e, f, g, h, a, b, c, d, w[4], _K[36]);
- round(d, e, f, g, h, a, b, c, w[5], _K[37]);
- round(c, d, e, f, g, h, a, b, w[6], _K[38]);
- round(b, c, d, e, f, g, h, a, w[7], _K[39]);
- round(a, b, c, d, e, f, g, h, w[8], _K[40]);
- round(h, a, b, c, d, e, f, g, w[9], _K[41]);
- round(g, h, a, b, c, d, e, f, w[10], _K[42]);
- round(f, g, h, a, b, c, d, e, w[11], _K[43]);
- round(e, f, g, h, a, b, c, d, w[12], _K[44]);
- round(d, e, f, g, h, a, b, c, w[13], _K[45]);
- round(c, d, e, f, g, h, a, b, w[14], _K[46]);
- round(b, c, d, e, f, g, h, a, w[15], _K[47]);
-
-
- w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
- w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
- w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
- w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
- w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
- w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
- w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
- w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
- w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
- w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
- w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
- w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
- w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
- w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
- w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
- w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
-
- round(a, b, c, d, e, f, g, h, w[0], _K[48]);
- round(h, a, b, c, d, e, f, g, w[1], _K[49]);
- round(g, h, a, b, c, d, e, f, w[2], _K[50]);
- round(f, g, h, a, b, c, d, e, w[3], _K[51]);
- round(e, f, g, h, a, b, c, d, w[4], _K[52]);
- round(d, e, f, g, h, a, b, c, w[5], _K[53]);
- round(c, d, e, f, g, h, a, b, w[6], _K[54]);
- round(b, c, d, e, f, g, h, a, w[7], _K[55]);
- round(a, b, c, d, e, f, g, h, w[8], _K[56]);
- round(h, a, b, c, d, e, f, g, w[9], _K[57]);
- round(g, h, a, b, c, d, e, f, w[10], _K[58]);
- round(f, g, h, a, b, c, d, e, w[11], _K[59]);
- round(e, f, g, h, a, b, c, d, w[12], _K[60]);
- round(d, e, f, g, h, a, b, c, w[13], _K[61]);
- round(c, d, e, f, g, h, a, b, w[14], _K[62]);
- round(b, c, d, e, f, g, h, a, w[15], _K[63]);
-
- a += _IV[0];
- b += _IV[1];
- c += _IV[2];
- d += _IV[3];
- e += _IV[4];
- f += _IV[5];
- g += _IV[6];
- h += _IV[7];
-
- digest[0] = a;
- digest[1] = b;
- digest[2] = c;
- digest[3] = d;
- digest[4] = e;
- digest[5] = f;
- digest[6] = g;
- digest[7] = h;
-}
-#endif
\ No newline at end of file
diff --git a/cudaUtil/Makefile b/cudaUtil/Makefile
deleted file mode 100644
index f443b14..0000000
--- a/cudaUtil/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-NAME=cudautil
-SRC=$(wildcard *.cpp)
-OBJS=$(SRC:.cpp=.o)
-
-all: ${SRC}
- for file in ${SRC} ; do\
- ${CXX} -c $$file ${INCLUDE} -I${CUDA_INCLUDE} ${CXXFLAGS};\
- done
- mkdir -p ${LIBDIR}
- ar rvs ${LIBDIR}/lib$(NAME).a ${OBJS}
-
-clean:
- rm -rf *.o
diff --git a/cudaUtil/cudaUtil.cpp b/cudaUtil/cudaUtil.cpp
deleted file mode 100644
index 349a604..0000000
--- a/cudaUtil/cudaUtil.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-#include "cudaUtil.h"
-
-
-cuda::CudaDeviceInfo cuda::getDeviceInfo(int device)
-{
- cuda::CudaDeviceInfo devInfo;
-
- cudaDeviceProp properties;
- cudaError_t err = cudaSuccess;
-
- err = cudaSetDevice(device);
-
- if(err) {
- throw cuda::CudaException(err);
- }
-
- err = cudaGetDeviceProperties(&properties, device);
-
- if(err) {
- throw cuda::CudaException(err);
- }
-
- devInfo.id = device;
- devInfo.major = properties.major;
- devInfo.minor = properties.minor;
- devInfo.mpCount = properties.multiProcessorCount;
- devInfo.mem = properties.totalGlobalMem;
- devInfo.name = std::string(properties.name);
-
- int cores = 0;
- switch(devInfo.major) {
- case 1:
- cores = 8;
- break;
- case 2:
- if(devInfo.minor == 0) {
- cores = 32;
- } else {
- cores = 48;
- }
- break;
- case 3:
- cores = 192;
- break;
- case 5:
- cores = 128;
- break;
- case 6:
- if(devInfo.minor == 1 || devInfo.minor == 2) {
- cores = 128;
- } else {
- cores = 64;
- }
- break;
- case 7:
- cores = 64;
- break;
- default:
- cores = 8;
- break;
- }
- devInfo.cores = cores;
-
- return devInfo;
-}
-
-
-std::vector cuda::getDevices()
-{
- int count = getDeviceCount();
-
- std::vector devList;
-
- for(int device = 0; device < count; device++) {
- devList.push_back(getDeviceInfo(device));
- }
-
- return devList;
-}
-
-int cuda::getDeviceCount()
-{
- int count = 0;
-
- cudaError_t err = cudaGetDeviceCount(&count);
-
- if(err) {
- throw cuda::CudaException(err);
- }
-
- return count;
-}
\ No newline at end of file
diff --git a/cudaUtil/cudaUtil.h b/cudaUtil/cudaUtil.h
deleted file mode 100644
index eaf7eab..0000000
--- a/cudaUtil/cudaUtil.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef _CUDA_UTIL_H
-#define _CUDA_UTIL_H
-
-#include
-#include
-
-#include
-#include
-
-namespace cuda {
- typedef struct {
-
- int id;
- int major;
- int minor;
- int mpCount;
- int cores;
- uint64_t mem;
- std::string name;
-
- }CudaDeviceInfo;
-
- class CudaException
- {
- public:
- cudaError_t error;
- std::string msg;
-
- CudaException(cudaError_t err)
- {
- this->error = err;
- this->msg = std::string(cudaGetErrorString(err));
- }
- };
-
- CudaDeviceInfo getDeviceInfo(int device);
-
- std::vector getDevices();
-
- int getDeviceCount();
-}
-#endif
\ No newline at end of file
diff --git a/cudaUtil/cudaUtil.vcxproj b/cudaUtil/cudaUtil.vcxproj
deleted file mode 100644
index 24d5ccb..0000000
--- a/cudaUtil/cudaUtil.vcxproj
+++ /dev/null
@@ -1,160 +0,0 @@
-
-
-
-
- Debug
- Win32
-
-
- Release
- Win32
-
-
- Debug
- x64
-
-
- Release
- x64
-
-
-
-
-
-
-
-
-
- {EADAAA54-E304-4656-8263-E5E688FF323D}
- Win32Proj
- cudaUtil
- 10.0
-
-
-
- StaticLibrary
- true
- v141
- Unicode
-
-
- StaticLibrary
- false
- v141
- true
- Unicode
-
-
- StaticLibrary
- true
- v142
- Unicode
-
-
- StaticLibrary
- false
- v142
- true
- Unicode
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Level3
- Disabled
- _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)
- C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;%(AdditionalIncludeDirectories)
-
-
- Windows
-
-
- C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\Win32;%(AdditionalLibraryDirectories)
- cuda.lib;cudart.lib;%(AdditionalDependencies)
-
-
-
-
- NotUsing
- Level3
- Disabled
- _CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)
- $(CUDA_INCLUDE);%(AdditionalIncludeDirectories)
-
-
- Windows
-
-
- %(AdditionalLibraryDirectories)
- %(AdditionalDependencies)
-
-
-
-
- Level3
-
-
- MaxSpeed
- true
- true
- _CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)
- C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include;%(AdditionalIncludeDirectories)
-
-
- Windows
- true
- true
-
-
- C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\Win32;%(AdditionalLibraryDirectories)
- cuda.lib;cudart.lib;%(AdditionalDependencies)
-
-
-
-
- Level3
- NotUsing
- MaxSpeed
- true
- true
- _CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)
- $(CUDA_INCLUDE);%(AdditionalIncludeDirectories)
-
-
- Windows
- true
- true
-
-
- %(AdditionalLibraryDirectories)
- %(AdditionalDependencies)
-
-
-
-
-
-
\ No newline at end of file
diff --git a/embedcl/embedcl.vcxproj b/embedcl/embedcl.vcxproj
index 8d2dd7f..3013aec 100644
--- a/embedcl/embedcl.vcxproj
+++ b/embedcl/embedcl.vcxproj
@@ -28,26 +28,26 @@
Application
true
- v141
+ ClangCl
MultiByte
Application
false
- v141
+ ClangCl
true
MultiByte
Application
true
- v142
+ ClangCl
MultiByte
Application
false
- v142
+ ClangCL
true
MultiByte
diff --git a/secp256k1lib/secp256k1.cpp b/secp256k1lib/secp256k1.cpp
index 6107ea0..e24fbe3 100644
--- a/secp256k1lib/secp256k1.cpp
+++ b/secp256k1lib/secp256k1.cpp
@@ -1,39 +1,36 @@
-#include
-#include
-#include
-#include"CryptoUtil.h"
+#include
+#include
+#include
+#include "CryptoUtil.h"
#include "secp256k1.h"
-
using namespace secp256k1;
static uint256 _ONE(1);
static uint256 _ZERO;
static crypto::Rng _rng;
-static inline void addc(unsigned int a, unsigned int b, unsigned int carryIn, unsigned int &sum, int &carryOut)
+static inline void addc(unsigned int a, unsigned int b, int& carry, unsigned int &sum)
{
- uint64_t sum64 = (uint64_t)a + b + carryIn;
+ uint64_t sum64 = (uint64_t)a + b + carry;
sum = (unsigned int)sum64;
- carryOut = (int)(sum64 >> 32) & 1;
+ carry = (int)(sum64 >> 32) & 1;
}
-static inline void subc(unsigned int a, unsigned int b, unsigned int borrowIn, unsigned int &diff, int &borrowOut)
+static inline void subc(unsigned int a, unsigned int b, int& borrow, unsigned int &diff)
{
- uint64_t diff64 = (uint64_t)a - b - borrowIn;
+ uint64_t diff64 = (uint64_t)a - b - borrow;
diff = (unsigned int)diff64;
- borrowOut = (int)((diff64 >> 32) & 1);
+ borrow = (int)((diff64 >> 32) & 1);
}
-
-
-static bool lessThanEqualTo(const unsigned int *a, const unsigned int *b, int len)
+static bool lessThanEqualTo(const unsigned int *a, const unsigned int *b)
{
- for(int i = len - 1; i >= 0; i--) {
+ for(int i = 7; i >= 0; i--) {
if(a[i] < b[i]) {
// is greater than
return true;
@@ -68,7 +65,7 @@ static int add(const unsigned int *a, const unsigned int *b, unsigned int *c, in
int carry = 0;
for(int i = 0; i < len; i++) {
- addc(a[i], b[i], carry, c[i], carry);
+ addc(a[i], b[i], carry, c[i]);
}
return carry;
@@ -79,7 +76,7 @@ static int sub(const unsigned int *a, const unsigned int *b, unsigned int *c, in
int borrow = 0;
for(int i = 0; i < len; i++) {
- subc(a[i], b[i], borrow, c[i], borrow);
+ subc(a[i], b[i], borrow, c[i]);
}
return borrow & 1;
@@ -386,7 +383,7 @@ uint256 secp256k1::invModP(const uint256 &x)
}
}
- if(lessThanEqualTo(v.v, u.v, 8)) {
+ if(lessThanEqualTo(v.v, u.v)) {
sub(u.v, v.v, u.v, 8);
// x1 = x1 - x2
@@ -613,7 +610,7 @@ uint256 secp256k1::multiplyModN(const uint256 &a, const uint256 &b)
return r;
}
-std::string secp256k1::uint256::toString(int base)
+std::string secp256k1::uint256::toString()
{
std::string s = "";
@@ -731,17 +728,6 @@ ecpoint secp256k1::multiplyPoint(const uint256 &k, const ecpoint &p)
return sum;
}
-uint256 generatePrivateKey()
-{
- uint256 k;
-
- for(int i = 0; i < 8; i++) {
- k.v[i] = ((unsigned int)rand() | ((unsigned int)rand()) << 17);
- }
-
- return k;
-}
-
bool secp256k1::pointExists(const ecpoint &p)
{
uint256 y = multiplyModP(p.y, p.y);
@@ -767,14 +753,15 @@ static void bulkInversionModP(std::vector &in)
uint256 inverse = secp256k1::invModP(total);
- for(int i = (int)in.size() - 1; i >= 0; i--) {
+ for(size_t i = in.size() - 1;; i--) {
- if(i > 0) {
+ if(i != 0) {
uint256 newValue = secp256k1::multiplyModP(products[i - 1], inverse);
inverse = multiplyModP(inverse, in[i]);
in[i] = newValue;
} else {
- in[i] = inverse;
+ in[0] = inverse;
+ break;
}
}
}
@@ -792,7 +779,7 @@ void secp256k1::generateKeyPairsBulk(unsigned int count, const ecpoint &basePoin
void secp256k1::generateKeyPairsBulk(const ecpoint &basePoint, std::vector &privKeys, std::vector &pubKeysOut)
{
- unsigned int count = (unsigned int)privKeys.size();
+ size_t count = privKeys.size();
//privKeysOut.clear();
pubKeysOut.clear();
@@ -801,12 +788,14 @@ void secp256k1::generateKeyPairsBulk(const ecpoint &basePoint, std::vector table;
table.push_back(basePoint);
- for(int i = 1; i < 256; i++) {
+ for(size_t i = 1; i < 256; i++) {
ecpoint p = doublePoint(table[i-1]);
+#ifdef DEBUG
if(!pointExists(p)) {
throw std::string("Point does not exist!");
}
+#endif
table.push_back(p);
}
@@ -860,13 +849,18 @@ void secp256k1::generateKeyPairsBulk(const ecpoint &basePoint, std::vector
#include
@@ -27,7 +27,7 @@ namespace secp256k1 {
std::string t = s;
// 0x prefix
- if(t.length() >= 2 && (t[0] == '0' && t[1] == 'x' || t[1] == 'X')) {
+ if(t.length() >= 2 && (t[0] == '0' && (t[1] == 'x' || t[1] == 'X'))) {
t = t.substr(2);
}
@@ -41,7 +41,7 @@ namespace secp256k1 {
}
// Verify only valid hex characters
- for(int i = 0; i < (int)t.length(); i++) {
+ for(size_t i = 0, tl = t.length(); i < tl; i++) {
if(!((t[i] >= 'a' && t[i] <= 'f') || (t[i] >= 'A' && t[i] <= 'F') || (t[i] >= '0' && t[i] <= '9'))) {
throw std::string("Incorrect hex formatting");
}
@@ -61,7 +61,7 @@ namespace secp256k1 {
int j = 0;
for(int i = len - 8; i >= 0; i-= 8) {
- std::string sub = t.substr(i, 8);
+ std::string sub = t.substr((unsigned long long)i, 8);
uint32_t val;
if(sscanf(sub.c_str(), "%x", &val) != 1) {
throw std::string("Incorrect hex formatting");
@@ -261,12 +261,13 @@ namespace secp256k1 {
return (this->v[0] & 1) == 0;
}
- std::string toString(int base = 16);
+ std::string toString();
uint64_t toUint64()
{
return ((uint64_t)this->v[1] << 32) | v[0];
}
+
};
const unsigned int _POINT_AT_INFINITY_WORDS[8] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
@@ -290,10 +291,10 @@ namespace secp256k1 {
this->y = uint256(_POINT_AT_INFINITY_WORDS);
}
- ecpoint(const uint256 &x, const uint256 &y)
+ ecpoint(const uint256 &pX, const uint256 &pY)
{
- this->x = x;
- this->y = y;
+ this->x = pX;
+ this->y = pY;
}
ecpoint(const ecpoint &p)
@@ -365,7 +366,8 @@ namespace secp256k1 {
void generateKeyPairsBulk(unsigned int count, const ecpoint &basePoint, std::vector &privKeysOut, std::vector &pubKeysOut);
void generateKeyPairsBulk(const ecpoint &basePoint, std::vector &privKeys, std::vector &pubKeysOut);
+ uint256 generatePrivateKey();
ecpoint parsePublicKey(const std::string &pubKeyString);
}
-#endif
\ No newline at end of file
+#endif
diff --git a/secp256k1lib/secp256k1lib.vcxproj b/secp256k1lib/secp256k1lib.vcxproj
index d9270a9..21aa7f3 100644
--- a/secp256k1lib/secp256k1lib.vcxproj
+++ b/secp256k1lib/secp256k1lib.vcxproj
@@ -5,6 +5,14 @@
Debug
Win32
+
+ Performance Release
+ Win32
+
+
+ Performance Release
+ x64
+
Release
Win32
@@ -33,32 +41,46 @@
{BFF4B5FE-C2C5-4384-8941-CD6CB29E78C6}
Win32Proj
secp256k1lib
- 10.0
+ 10.0.19041.0
StaticLibrary
true
- v141
+ ClangCl
+ Unicode
+
+
+ StaticLibrary
+ false
+ ClangCl
Unicode
StaticLibrary
false
- v141
+ ClangCl
true
Unicode
StaticLibrary
true
- v142
+ ClangCl
NotSet
+
+ StaticLibrary
+ false
+ ClangCl
+ NotSet
+ true
+ x64
+
StaticLibrary
false
- v142
+ ClangCL
true
NotSet
@@ -71,6 +93,10 @@
+
+
+
+
@@ -79,6 +105,10 @@
+
+
+
+
@@ -98,10 +128,23 @@
Windows
+
+
+
+
+ Level3
+ Disabled
+ _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)
+ $(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories)
+
+
+ Windows
+
+
NotUsing
- Level3
+ EnableAllWarnings
Disabled
_CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)
$(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories)
@@ -110,6 +153,25 @@
Windows
+
+
+ NotUsing
+ Level3
+ _CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)
+ $(SolutionDir)CryptoUtil;%(AdditionalIncludeDirectories)
+ None
+ MaxSpeed
+ AnySuitable
+ true
+ Speed
+ true
+ true
+ true
+
+
+ Windows
+
+
Level3
diff --git a/util/util.cpp b/util/util.cpp
index b48750b..c7bbdf2 100644
--- a/util/util.cpp
+++ b/util/util.cpp
@@ -8,7 +8,7 @@
#include"util.h"
#ifdef _WIN32
-#include
+#include
#else
#include
#include
@@ -295,4 +295,4 @@ namespace util {
return s.substr(left, right - left + 1);
}
-}
\ No newline at end of file
+}
diff --git a/util/util.h b/util/util.h
index 83b81b2..5a0b4e3 100644
--- a/util/util.h
+++ b/util/util.h
@@ -1,5 +1,5 @@
-#ifndef _UTIL_H
-#define _UTIL_H
+#ifndef UTIL_H
+#define UTIL_H
#include
#include
@@ -27,6 +27,8 @@ std::string formatSeconds(unsigned int seconds);
uint32_t parseUInt32(std::string s);
uint64_t parseUInt64(std::string s);
bool isHex(const std::string &s);
+
+long getFileSize(const std::string& fileName);
bool appendToFile(const std::string &fileName, const std::string &s);
bool readLinesFromStream(std::istream &in, std::vector &lines);
bool readLinesFromStream(const std::string &fileName, std::vector &lines);
@@ -43,4 +45,4 @@ std::string trim(const std::string &s, char c=' ');
}
-#endif
\ No newline at end of file
+#endif
diff --git a/util/util.vcxproj b/util/util.vcxproj
index 687925a..5c7800e 100644
--- a/util/util.vcxproj
+++ b/util/util.vcxproj
@@ -5,6 +5,14 @@
Debug
Win32
+
+ Performance Release
+ Win32
+
+
+ Performance Release
+ x64
+
Release
Win32
@@ -28,32 +36,46 @@
{93B89BF6-32B9-4EBA-AA44-BCFEC4836B76}
Win32Proj
util
- 10.0
+ 10.0.19041.0
StaticLibrary
true
- v141
+ ClangCl
+ Unicode
+
+
+ StaticLibrary
+ false
+ ClangCl
Unicode
StaticLibrary
false
- v141
+ ClangCl
true
Unicode
StaticLibrary
true
- v142
+ ClangCl
+ Unicode
+
+
+ StaticLibrary
+ false
+ ClangCl
Unicode
+ true
+ x64
StaticLibrary
false
- v142
+ ClangCL
true
Unicode
@@ -66,6 +88,10 @@
+
+
+
+
@@ -74,6 +100,10 @@
+
+
+
+
@@ -92,10 +122,22 @@
Windows
+
+
+
+
+ Level3
+ Disabled
+ _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)
+
+
+ Windows
+
+
NotUsing
- Level3
+ EnableAllWarnings
Disabled
_CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)
@@ -103,6 +145,24 @@
Windows
+
+
+ NotUsing
+ Level3
+ _CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)
+ None
+ MaxSpeed
+ AnySuitable
+ true
+ Speed
+ true
+ true
+ true
+
+
+ Windows
+
+
Level3