diff --git a/main.c b/main.c index 09d7c64..b648fa0 100644 --- a/main.c +++ b/main.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "blake.h" #include "_kernel.h" @@ -31,6 +32,7 @@ uint64_t nr_nonces = 1; uint32_t do_list_devices = 0; uint32_t gpu_to_use = 0; uint32_t mining = 0; +double kern_avg_run_time = 0; typedef struct debug_s { @@ -113,6 +115,24 @@ void randomize(void *p, ssize_t l) fatal("close %s: %s\n", fname, strerror(errno)); } + +#define NSEC 1e-9 +double timespec_to_double(struct timespec *t) +{ + return ((double)t->tv_sec) + ((double) t->tv_nsec) * NSEC; +} + +void double_to_timespec(double dt, struct timespec *t) +{ + t->tv_sec = (long)dt; + t->tv_nsec = (long)((dt - t->tv_sec) / NSEC); +} + +void get_time(struct timespec *t) +{ + clock_gettime(CLOCK_MONOTONIC, t); +} + cl_mem check_clCreateBuffer(cl_context ctx, cl_mem_flags flags, size_t size, void *host_ptr) { @@ -776,21 +796,61 @@ uint32_t verify_sol(sols_t *sols, unsigned sol_i) */ uint32_t verify_sols(cl_command_queue queue, cl_mem buf_sols, uint64_t *nonce, uint8_t *header, size_t fixed_nonce_bytes, uint8_t *target, - char *job_id, uint32_t *shares) + char *job_id, uint32_t *shares, struct timespec *target_time) { sols_t *sols; uint32_t nr_valid_sols; sols = (sols_t *)malloc(sizeof (*sols)); if (!sols) - fatal("malloc: %s\n", strerror(errno)); + fatal("malloc: %s\n", strerror(errno)); + + cl_event readEvent; check_clEnqueueReadBuffer(queue, buf_sols, - CL_TRUE, // cl_bool blocking_read + CL_FALSE, // cl_bool blocking_read 0, // size_t offset sizeof (*sols), // size_t size sols, // void *ptr 0, // cl_uint num_events_in_wait_list NULL, // cl_event *event_wait_list - NULL); // cl_event *event + &readEvent); // cl_event *event + clFlush(queue); + + struct timespec start_time; + get_time(&start_time); + + double dtarget = timespec_to_double(target_time); + + cl_int readStatus; + clGetEventInfo(readEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), + &readStatus, NULL); + while(readStatus != CL_COMPLETE && SLEEP_SKIP_RATIO != 1) + { + struct timespec t; + get_time(&t); + double dt = timespec_to_double(&t); + double delta = dtarget - dt; + if (delta < 0) + break; + + double_to_timespec(delta * SLEEP_RECHECK_RATIO, &t); + nanosleep(&t, NULL); + + clGetEventInfo(readEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), + &readStatus, NULL); + } + clWaitForEvents(1, &readEvent); + + struct timespec end_time; + get_time(&end_time); + + double dstart, dend, delta; + dstart = timespec_to_double(&start_time); + dend = timespec_to_double(&end_time); + + delta = dend - dstart; + kern_avg_run_time = kern_avg_run_time * 6.0 / 10.0 + delta * ((4.0 / 10.0)); + kern_avg_run_time *= (1 - (double)SLEEP_SKIP_RATIO); + if (sols->nr > MAX_SOLS) { fprintf(stderr, "%d (probably invalid) solutions were dropped!\n", @@ -904,10 +964,20 @@ uint32_t solve_equihash(cl_context ctx, cl_command_queue queue, check_clSetKernelArg(k_sols, 3, &rowCounters[0]); check_clSetKernelArg(k_sols, 4, &rowCounters[1]); global_ws = NR_ROWS; + + struct timespec start_time, target_time; + get_time(&start_time); + + double dstart, dtarget = 0; + dstart = timespec_to_double(&start_time); + dtarget = dstart + kern_avg_run_time; + double_to_timespec(dtarget, &target_time); + check_clEnqueueNDRangeKernel(queue, k_sols, 1, NULL, &global_ws, &local_work_size, 0, NULL, NULL); + clFlush(queue); sol_found = verify_sols(queue, buf_sols, nonce_ptr, header, - fixed_nonce_bytes, target, job_id, shares); + fixed_nonce_bytes, target, job_id, shares, &target_time); clReleaseMemObject(buf_blake_st); return sol_found; } diff --git a/param.h b/param.h index e31784d..1f8095a 100644 --- a/param.h +++ b/param.h @@ -14,6 +14,13 @@ // Number of collision items to track, per thread #define COLL_DATA_SIZE_PER_TH (NR_SLOTS * 5) + +// Ratio of time of sleeping before rechecking if task is done (0-1) +#define SLEEP_RECHECK_RATIO 0.60 +// Ratio of time to busy wait for the solution (0-1) +// The higher value the higher CPU usage with Nvidia +#define SLEEP_SKIP_RATIO 0.005 + // Make hash tables OVERHEAD times larger than necessary to store the average // number of elements per row. The ideal value is as small as possible to // reduce memory usage, but not too small or else elements are dropped from the