Skip to content

Commit

Permalink
Fix for intensity > 15
Browse files Browse the repository at this point in the history
  • Loading branch information
lasybear committed May 26, 2014
1 parent aa75e23 commit 5136036
Show file tree
Hide file tree
Showing 21 changed files with 10,104 additions and 164 deletions.
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ sgminer_SOURCES += animecoin.c animecoin.h
sgminer_SOURCES += groestlcoin.c groestlcoin.h
sgminer_SOURCES += sifcoin.c sifcoin.h
sgminer_SOURCES += twecoin.c twecoin.h
sgminer_SOURCES += marucoin.c marucoin.h
sgminer_SOURCES += kernel/*.cl

bin_SCRIPTS = $(top_srcdir)/kernel/*.cl
Expand Down
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@ License: GPLv3. See `COPYING` for details.
## Mod x11 (separate kernels)

0. Use "kernel" : "x11mod" in config file or -k x11mod via command line.
1. Do not use thread concurrency higher than 15.
2. In case of instability please set a little bit lower engine frequency than a original sgminer.
3. Tested only on Ubuntu x64 and BAMT.
4. Tested only on single R9 280x and rig of 5 x R9 270x.
1. In case of instability please set a little bit lower engine frequency than a original sgminer.

My experience:
MSI R9 280x 3.4 MH/s
Expand Down Expand Up @@ -53,6 +50,10 @@ NO HIDDEN MININGS )

####If you satisfied, please donate

####Remember remember the 5th of November
####BTC: 1AiCRMxgf1ptVQwx6hDuKMu4f7F27QmJC2

####lasybear
####BTC: 1LykvDuT7PCJ2xgT513RsyWQSZ9pbrDFa6
####MRO: 47mfp5AVx6R1Gdc6qPZeV2Ceq1Aw3HFXLRHgjW2pHszVRLzg5pe4tNPMz1NSmAYADGFmUDthoLzyBDBGEz7AoFya9QZKWcu
####LTC: LYpe5amuVYdSKmi3D5HCoKMeBd6yty8xVi
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ AC_DEFINE_UNQUOTED([ANIMECOIN_KERNNAME], ["animecoin"], [Filename for AnimeCoin
AC_DEFINE_UNQUOTED([GROESTLCOIN_KERNNAME], ["groestlcoin"], [Filename for GroestlCoin optimised kernel])
AC_DEFINE_UNQUOTED([SIFCOIN_KERNNAME], ["sifcoin"], [Filename for Sifcoin optimised kernel])
AC_DEFINE_UNQUOTED([TWECOIN_KERNNAME], ["twecoin"], [Filename for Twecoin optimised kernel])
AC_DEFINE_UNQUOTED([MARUCOIN_KERNNAME], ["marucoin"], [Filename for MaruCoin optimised kernel])
AC_DEFINE_UNQUOTED([X11MOD_KERNNAME], ["x11mod"], [Filename for X11mod optimised kernel])

AC_SUBST(OPENCL_LIBS)
Expand Down
157 changes: 79 additions & 78 deletions driver-opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,24 +34,24 @@
#include "adl.h"
#include "util.h"

#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
if (_err == CL_SUCCESS) \
break; \
applog(LOG_ERR, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
if (_err == CL_SUCCESS) \
break; \
applog(LOG_ERR, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} while (0)

#define CL_CHECK_ERR(_expr) \
({ \
cl_int _err = CL_INVALID_VALUE; \
typeof(_expr) _ret = _expr; \
if (_err != CL_SUCCESS) { \
applog(LOG_ERR, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} \
_ret; \
#define CL_CHECK_ERR(_expr) \
({ \
cl_int _err = CL_INVALID_VALUE; \
typeof(_expr) _ret = _expr; \
if (_err != CL_SUCCESS) { \
applog(LOG_ERR, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} \
_ret; \
})

/* TODO: cleanup externals ********************/
Expand Down Expand Up @@ -229,6 +229,8 @@ static enum cl_kernels select_kernel(char *arg)
return KL_PSW;
if (!strcmp(arg, DARKCOIN_KERNNAME))
return KL_DARKCOIN;
if (!strcmp(arg, X11MOD_KERNNAME))
return KL_X11MOD;
if (!strcmp(arg, QUBITCOIN_KERNNAME))
return KL_QUBITCOIN;
if (!strcmp(arg, QUARKCOIN_KERNNAME))
Expand All @@ -247,8 +249,8 @@ static enum cl_kernels select_kernel(char *arg)
return KL_SIFCOIN;
if (!strcmp(arg, TWECOIN_KERNNAME))
return KL_TWECOIN;
if (!strcmp(arg, X11MOD_KERNNAME))
return KL_X11MOD;
if (!strcmp(arg, MARUCOIN_KERNNAME))
return KL_MARUCOIN;

return KL_NONE;
}
Expand All @@ -266,9 +268,7 @@ char *set_kernel(char *arg)
if (kern == KL_NONE)
return "Invalid parameter to set_kernel";
gpus[device++].kernel = kern;
if (kern >= KL_FUGUECOIN)
dm_mode = DM_FUGUECOIN;
else if (kern >= KL_DARKCOIN)
if (kern >= KL_DARKCOIN)
dm_mode = DM_BITCOIN;
else if(kern >= KL_QUARKCOIN)
dm_mode = DM_QUARKCOIN;
Expand Down Expand Up @@ -1105,7 +1105,6 @@ static cl_int queue_sph_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unus
return status;
}


static cl_int queue_x11mod_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
unsigned char *midstate = blk->work->midstate;
Expand Down Expand Up @@ -1169,6 +1168,7 @@ static void set_threads_hashes(unsigned int vectors, unsigned int compute_shader
threads = minthreads;
}
}

*globalThreads = threads;
*hashes = threads * vectors;
}
Expand Down Expand Up @@ -1465,8 +1465,12 @@ static bool opencl_thread_prepare(struct thr_info *thr)
case KL_TWECOIN:
cgpu->kname = TWECOIN_KERNNAME;
break;
case KL_MARUCOIN:
cgpu->kname = MARUCOIN_KERNNAME;
break;
case KL_X11MOD:
cgpu->kname = X11MOD_KERNNAME;
break;
default:
break;
}
Expand Down Expand Up @@ -1495,6 +1499,9 @@ static bool opencl_thread_init(struct thr_info *thr)
}

switch (clState->chosen_kernel) {
case KL_X11MOD:
thrdata->queue_kernel_parameters = &queue_x11mod_kernel;
break;
case KL_ALEXKARNEW:
case KL_ALEXKAROLD:
case KL_CKOLIVAS:
Expand All @@ -1512,17 +1519,16 @@ static bool opencl_thread_init(struct thr_info *thr)
case KL_GROESTLCOIN:
case KL_SIFCOIN:
case KL_TWECOIN:
case KL_MARUCOIN:
thrdata->queue_kernel_parameters = &queue_sph_kernel;
break;
case KL_X11MOD:
thrdata->queue_kernel_parameters = &queue_x11mod_kernel;
break;
default:
applog(LOG_ERR, "Failed to choose kernel in opencl_thread_init");
break;
}

thrdata->res = calloc(buffersize, 1);

if (!thrdata->res) {
free(thrdata);
applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
Expand Down Expand Up @@ -1553,11 +1559,12 @@ static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work
extern int opt_dynamic_interval;

#define CL_ENQUEUE_KERNEL(KL, GWO) \
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel_##KL, 1, GWO, globalThreads, localThreads, 0, NULL, NULL); \
if (unlikely(status != CL_SUCCESS)) { \
applog(LOG_ERR, "Error %d: Enqueueing kernel #KL onto command queue. (clEnqueueNDRangeKernel)", status); \
return -1; \
}
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel_##KL, 1, GWO, globalThreads, localThreads, 0, NULL, NULL); \
if (unlikely(status != CL_SUCCESS)) { \
applog(LOG_ERR, "Error %d: Enqueueing kernel #KL onto command queue. (clEnqueueNDRangeKernel)", status); \
return -1; \
}


static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
int64_t __maybe_unused max_nonce)
Expand All @@ -1570,7 +1577,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
const int dynamic_us = opt_dynamic_interval * 1000;

cl_int status;
size_t globalThreads[1] = { 1 };
size_t globalThreads[1];
size_t localThreads[1] = { clState->wsize };
int64_t hashes;
int found = FOUND;
Expand Down Expand Up @@ -1607,63 +1614,56 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,

if (clState->chosen_kernel == KL_X11MOD) {
if (clState->goffset) {
size_t global_work_offset[1];
global_work_offset[0] = work->blk.nonce;

CL_ENQUEUE_KERNEL(blake, global_work_offset);
CL_ENQUEUE_KERNEL(bmw, global_work_offset);
CL_ENQUEUE_KERNEL(groestl, global_work_offset);
CL_ENQUEUE_KERNEL(skein, global_work_offset);
CL_ENQUEUE_KERNEL(jh, global_work_offset);
CL_ENQUEUE_KERNEL(keccak, global_work_offset);
CL_ENQUEUE_KERNEL(luffa, global_work_offset);
CL_ENQUEUE_KERNEL(cubehash, global_work_offset);
CL_ENQUEUE_KERNEL(shavite, global_work_offset);
CL_ENQUEUE_KERNEL(simd, global_work_offset)
CL_ENQUEUE_KERNEL(echo, global_work_offset);
}
else {
CL_ENQUEUE_KERNEL(blake, NULL);
CL_ENQUEUE_KERNEL(bmw, NULL);
CL_ENQUEUE_KERNEL(groestl, NULL);
CL_ENQUEUE_KERNEL(skein, NULL);
CL_ENQUEUE_KERNEL(jh, NULL);
CL_ENQUEUE_KERNEL(keccak, NULL);
CL_ENQUEUE_KERNEL(luffa, NULL);
CL_ENQUEUE_KERNEL(cubehash, NULL);
CL_ENQUEUE_KERNEL(shavite, NULL);
CL_ENQUEUE_KERNEL(simd, NULL)
CL_ENQUEUE_KERNEL(echo, NULL);
}

status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
buffersize, thrdata->res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
return -1;
size_t global_work_offset[1];
global_work_offset[0] = work->blk.nonce;

CL_ENQUEUE_KERNEL(blake, global_work_offset);
CL_ENQUEUE_KERNEL(bmw, global_work_offset);
CL_ENQUEUE_KERNEL(groestl, global_work_offset);
CL_ENQUEUE_KERNEL(skein, global_work_offset);
CL_ENQUEUE_KERNEL(jh, global_work_offset);
CL_ENQUEUE_KERNEL(keccak, global_work_offset);
CL_ENQUEUE_KERNEL(luffa, global_work_offset);
CL_ENQUEUE_KERNEL(cubehash, global_work_offset);
CL_ENQUEUE_KERNEL(shavite, global_work_offset);
CL_ENQUEUE_KERNEL(simd, global_work_offset)
CL_ENQUEUE_KERNEL(echo, global_work_offset);
}
else {
CL_ENQUEUE_KERNEL(blake, NULL);
CL_ENQUEUE_KERNEL(bmw, NULL);
CL_ENQUEUE_KERNEL(groestl, NULL);
CL_ENQUEUE_KERNEL(skein, NULL);
CL_ENQUEUE_KERNEL(jh, NULL);
CL_ENQUEUE_KERNEL(keccak, NULL);
CL_ENQUEUE_KERNEL(luffa, NULL);
CL_ENQUEUE_KERNEL(cubehash, NULL);
CL_ENQUEUE_KERNEL(shavite, NULL);
CL_ENQUEUE_KERNEL(simd, NULL)
CL_ENQUEUE_KERNEL(echo, NULL);
}
}
else {
if (clState->goffset) {
size_t global_work_offset[1];
size_t global_work_offset[1];

global_work_offset[0] = work->blk.nonce;
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, global_work_offset,
globalThreads, localThreads, 0, NULL, NULL);
global_work_offset[0] = work->blk.nonce;
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, global_work_offset,
globalThreads, localThreads, 0, NULL, NULL);
} else
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
globalThreads, localThreads, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
return -1;
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
return -1;
}
}

status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
buffersize, thrdata->res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
return -1;
}
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
buffersize, thrdata->res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
return -1;
}

/* The amount of work scanned can fluctuate when intensity changes
Expand Down Expand Up @@ -1714,6 +1714,7 @@ static void opencl_thread_shutdown(struct thr_info *thr)
else {
clReleaseKernel(clState->kernel);
}

clReleaseProgram(clState->program);
clReleaseCommandQueue(clState->commandQueue);
clReleaseContext(clState->context);
Expand Down
2 changes: 1 addition & 1 deletion findnonce.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS)
#define FOUND (0xFF)

#define THASHBUFSIZE (8 * 16 * 16384)
#define THASHBUFSIZE (8 * 16 * 4194304)

extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res);
Expand Down
56 changes: 56 additions & 0 deletions kernel/fugue.cl
Original file line number Diff line number Diff line change
Expand Up @@ -614,4 +614,60 @@ __constant static const sph_u32 mixtab3[] = {
SMIX(S06, S07, S08, S09); \
} while (0)

#define ROR3 do { \
sph_u32 B33 = S33, B34 = S34, B35 = S35; \
S35 = S32; S34 = S31; S33 = S30; S32 = S29; S31 = S28; S30 = S27; S29 = S26; S28 = S25; S27 = S24; \
S26 = S23; S25 = S22; S24 = S21; S23 = S20; S22 = S19; S21 = S18; S20 = S17; S19 = S16; S18 = S15; \
S17 = S14; S16 = S13; S15 = S12; S14 = S11; S13 = S10; S12 = S09; S11 = S08; S10 = S07; S09 = S06; \
S08 = S05; S07 = S04; S06 = S03; S05 = S02; S04 = S01; S03 = S00; S02 = B35; S01 = B34; S00 = B33; \
} while (0)

#define ROR8 do { \
sph_u32 B28 = S28, B29 = S29, B30 = S30, B31 = S31, B32 = S32, B33 = S33, B34 = S34, B35 = S35; \
S35 = S27; S34 = S26; S33 = S25; S32 = S24; S31 = S23; S30 = S22; S29 = S21; S28 = S20; S27 = S19; \
S26 = S18; S25 = S17; S24 = S16; S23 = S15; S22 = S14; S21 = S13; S20 = S12; S19 = S11; S18 = S10; \
S17 = S09; S16 = S08; S15 = S07; S14 = S06; S13 = S05; S12 = S04; S11 = S03; S10 = S02; S09 = S01; \
S08 = S00; S07 = B35; S06 = B34; S05 = B33; S04 = B32; S03 = B31; S02 = B30; S01 = B29; S00 = B28; \
} while (0)

#define ROR9 do { \
sph_u32 B27 = S27, B28 = S28, B29 = S29, B30 = S30, B31 = S31, B32 = S32, B33 = S33, B34 = S34, B35 = S35; \
S35 = S26; S34 = S25; S33 = S24; S32 = S23; S31 = S22; S30 = S21; S29 = S20; S28 = S19; S27 = S18; \
S26 = S17; S25 = S16; S24 = S15; S23 = S14; S22 = S13; S21 = S12; S20 = S11; S19 = S10; S18 = S09; \
S17 = S08; S16 = S07; S15 = S06; S14 = S05; S13 = S04; S12 = S03; S11 = S02; S10 = S01; S09 = S00; \
S08 = B35; S07 = B34; S06 = B33; S05 = B32; S04 = B31; S03 = B30; S02 = B29; S01 = B28; S00 = B27; \
} while (0)

#define FUGUE512_3(x, y, z) do { \
TIX4(x, S00, S01, S04, S07, S08, S22, S24, S27, S30); \
CMIX36(S33, S34, S35, S01, S02, S03, S15, S16, S17); \
SMIX(S33, S34, S35, S00); \
CMIX36(S30, S31, S32, S34, S35, S00, S12, S13, S14); \
SMIX(S30, S31, S32, S33); \
CMIX36(S27, S28, S29, S31, S32, S33, S09, S10, S11); \
SMIX(S27, S28, S29, S30); \
CMIX36(S24, S25, S26, S28, S29, S30, S06, S07, S08); \
SMIX(S24, S25, S26, S27); \
\
TIX4(y, S24, S25, S28, S31, S32, S10, S12, S15, S18); \
CMIX36(S21, S22, S23, S25, S26, S27, S03, S04, S05); \
SMIX(S21, S22, S23, S24); \
CMIX36(S18, S19, S20, S22, S23, S24, S00, S01, S02); \
SMIX(S18, S19, S20, S21); \
CMIX36(S15, S16, S17, S19, S20, S21, S33, S34, S35); \
SMIX(S15, S16, S17, S18); \
CMIX36(S12, S13, S14, S16, S17, S18, S30, S31, S32); \
SMIX(S12, S13, S14, S15); \
\
TIX4(z, S12, S13, S16, S19, S20, S34, S00, S03, S06); \
CMIX36(S09, S10, S11, S13, S14, S15, S27, S28, S29); \
SMIX(S09, S10, S11, S12); \
CMIX36(S06, S07, S08, S10, S11, S12, S24, S25, S26); \
SMIX(S06, S07, S08, S09); \
CMIX36(S03, S04, S05, S07, S08, S09, S21, S22, S23); \
SMIX(S03, S04, S05, S06); \
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); \
SMIX(S00, S01, S02, S03); \
} while (0)


Loading

0 comments on commit 5136036

Please sign in to comment.